rapidata 2.33.1__py3-none-any.whl → 2.33.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rapidata might be problematic. Click here for more details.
- rapidata/__init__.py +2 -2
- rapidata/rapidata_client/__init__.py +1 -1
- rapidata/rapidata_client/benchmark/participant/__init__.py +0 -0
- rapidata/rapidata_client/benchmark/participant/_participant.py +102 -0
- rapidata/rapidata_client/benchmark/rapidata_benchmark.py +25 -23
- rapidata/rapidata_client/selection/__init__.py +1 -1
- rapidata/rapidata_client/selection/effort_selection.py +9 -2
- rapidata/service/openapi_service.py +5 -0
- {rapidata-2.33.1.dist-info → rapidata-2.33.2.dist-info}/METADATA +1 -1
- {rapidata-2.33.1.dist-info → rapidata-2.33.2.dist-info}/RECORD +12 -10
- {rapidata-2.33.1.dist-info → rapidata-2.33.2.dist-info}/LICENSE +0 -0
- {rapidata-2.33.1.dist-info → rapidata-2.33.2.dist-info}/WHEEL +0 -0
rapidata/__init__.py
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
__version__ = "2.33.
|
|
1
|
+
__version__ = "2.33.2"
|
|
2
2
|
|
|
3
3
|
from .rapidata_client import (
|
|
4
4
|
RapidataClient,
|
|
5
5
|
DemographicSelection,
|
|
6
6
|
LabelingSelection,
|
|
7
|
-
|
|
7
|
+
EffortSelection,
|
|
8
8
|
RetrievalMode,
|
|
9
9
|
ValidationSelection,
|
|
10
10
|
ConditionalValidationSelection,
|
|
File without changes
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
2
|
+
from tqdm import tqdm
|
|
3
|
+
|
|
4
|
+
from rapidata.rapidata_client.datapoints.assets import MediaAsset
|
|
5
|
+
from rapidata.rapidata_client.logging import logger
|
|
6
|
+
from rapidata.rapidata_client.logging.output_manager import RapidataOutputManager
|
|
7
|
+
from rapidata.api_client.models.create_sample_model import CreateSampleModel
|
|
8
|
+
from rapidata.service.openapi_service import OpenAPIService
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class BenchmarkParticipant:
|
|
12
|
+
def __init__(self, name: str, id: str, openapi_service: OpenAPIService):
|
|
13
|
+
self.name = name
|
|
14
|
+
self.id = id
|
|
15
|
+
self.__openapi_service = openapi_service
|
|
16
|
+
|
|
17
|
+
def _process_single_sample_upload(
|
|
18
|
+
self,
|
|
19
|
+
asset: MediaAsset,
|
|
20
|
+
identifier: str,
|
|
21
|
+
) -> tuple[MediaAsset | None, MediaAsset | None]:
|
|
22
|
+
"""
|
|
23
|
+
Process single sample upload with retry logic and error tracking.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
asset: MediaAsset to upload
|
|
27
|
+
identifier: Identifier for the sample
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
tuple[MediaAsset | None, MediaAsset | None]: (successful_asset, failed_asset)
|
|
31
|
+
"""
|
|
32
|
+
if asset.is_local():
|
|
33
|
+
files = [asset.to_file()]
|
|
34
|
+
urls = []
|
|
35
|
+
else:
|
|
36
|
+
files = []
|
|
37
|
+
urls = [asset.path]
|
|
38
|
+
|
|
39
|
+
last_exception = None
|
|
40
|
+
try:
|
|
41
|
+
self.__openapi_service.participant_api.participant_participant_id_sample_post(
|
|
42
|
+
participant_id=self.id,
|
|
43
|
+
model=CreateSampleModel(
|
|
44
|
+
identifier=identifier
|
|
45
|
+
),
|
|
46
|
+
files=files,
|
|
47
|
+
urls=urls
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
return asset, None
|
|
51
|
+
|
|
52
|
+
except Exception as e:
|
|
53
|
+
last_exception = e
|
|
54
|
+
|
|
55
|
+
logger.error(f"Upload failed for {identifier}. Error: {str(last_exception)}")
|
|
56
|
+
return None, asset
|
|
57
|
+
|
|
58
|
+
def upload_media(
|
|
59
|
+
self,
|
|
60
|
+
assets: list[MediaAsset],
|
|
61
|
+
identifiers: list[str],
|
|
62
|
+
max_workers: int = 10,
|
|
63
|
+
) -> tuple[list[MediaAsset], list[MediaAsset]]:
|
|
64
|
+
"""
|
|
65
|
+
Upload samples concurrently with proper error handling and progress tracking.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
assets: List of MediaAsset objects to upload
|
|
69
|
+
identifiers: List of identifiers matching the assets
|
|
70
|
+
max_workers: Maximum number of concurrent upload workers
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
tuple[list[str], list[str]]: Lists of successful and failed identifiers
|
|
74
|
+
"""
|
|
75
|
+
successful_uploads: list[MediaAsset] = []
|
|
76
|
+
failed_uploads: list[MediaAsset] = []
|
|
77
|
+
total_uploads = len(assets)
|
|
78
|
+
|
|
79
|
+
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
80
|
+
futures = [
|
|
81
|
+
executor.submit(
|
|
82
|
+
self._process_single_sample_upload,
|
|
83
|
+
asset,
|
|
84
|
+
identifier,
|
|
85
|
+
)
|
|
86
|
+
for asset, identifier in zip(assets, identifiers)
|
|
87
|
+
]
|
|
88
|
+
|
|
89
|
+
with tqdm(total=total_uploads, desc="Uploading media", disable=RapidataOutputManager.silent_mode) as pbar:
|
|
90
|
+
for future in as_completed(futures):
|
|
91
|
+
try:
|
|
92
|
+
successful_id, failed_id = future.result()
|
|
93
|
+
if successful_id:
|
|
94
|
+
successful_uploads.append(successful_id)
|
|
95
|
+
if failed_id:
|
|
96
|
+
failed_uploads.append(failed_id)
|
|
97
|
+
except Exception as e:
|
|
98
|
+
logger.error(f"Future execution failed: {str(e)}")
|
|
99
|
+
|
|
100
|
+
pbar.update(1)
|
|
101
|
+
|
|
102
|
+
return successful_uploads, failed_uploads
|
|
@@ -11,14 +11,13 @@ from rapidata.api_client.models.url_asset_input import UrlAssetInput
|
|
|
11
11
|
from rapidata.api_client.models.file_asset_model import FileAssetModel
|
|
12
12
|
from rapidata.api_client.models.source_url_metadata_model import SourceUrlMetadataModel
|
|
13
13
|
|
|
14
|
+
|
|
15
|
+
from rapidata.rapidata_client.benchmark.participant._participant import BenchmarkParticipant
|
|
14
16
|
from rapidata.rapidata_client.logging import logger
|
|
15
17
|
from rapidata.service.openapi_service import OpenAPIService
|
|
16
18
|
|
|
17
19
|
from rapidata.rapidata_client.benchmark.leaderboard.rapidata_leaderboard import RapidataLeaderboard
|
|
18
|
-
from rapidata.rapidata_client.datapoints.metadata import PromptIdentifierMetadata
|
|
19
20
|
from rapidata.rapidata_client.datapoints.assets import MediaAsset
|
|
20
|
-
from rapidata.rapidata_client.order._rapidata_dataset import RapidataDataset
|
|
21
|
-
from rapidata.rapidata_client.datapoints.datapoint import Datapoint
|
|
22
21
|
|
|
23
22
|
class RapidataBenchmark:
|
|
24
23
|
"""
|
|
@@ -250,7 +249,7 @@ class RapidataBenchmark:
|
|
|
250
249
|
leaderboard_result.id,
|
|
251
250
|
self.__openapi_service
|
|
252
251
|
)
|
|
253
|
-
|
|
252
|
+
|
|
254
253
|
def evaluate_model(self, name: str, media: list[str], identifiers: list[str]) -> None:
|
|
255
254
|
"""
|
|
256
255
|
Evaluates a model on the benchmark across all leaderboards.
|
|
@@ -272,11 +271,9 @@ class RapidataBenchmark:
|
|
|
272
271
|
\nTo see the prompts that are associated with the identifiers, use the prompts property.")
|
|
273
272
|
|
|
274
273
|
# happens before the creation of the participant to ensure all media paths are valid
|
|
275
|
-
assets = []
|
|
276
|
-
|
|
277
|
-
for media_path, identifier in zip(media, identifiers):
|
|
274
|
+
assets: list[MediaAsset] = []
|
|
275
|
+
for media_path in media:
|
|
278
276
|
assets.append(MediaAsset(media_path))
|
|
279
|
-
prompts_metadata.append([PromptIdentifierMetadata(identifier=identifier)])
|
|
280
277
|
|
|
281
278
|
participant_result = self.__openapi_service.benchmark_api.benchmark_benchmark_id_participants_post(
|
|
282
279
|
benchmark_id=self.id,
|
|
@@ -285,22 +282,27 @@ class RapidataBenchmark:
|
|
|
285
282
|
)
|
|
286
283
|
)
|
|
287
284
|
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
try:
|
|
291
|
-
dataset.add_datapoints([Datapoint(asset=asset, metadata=metadata) for asset, metadata in zip(assets, prompts_metadata)])
|
|
292
|
-
except Exception as e:
|
|
293
|
-
logger.warning(f"An error occurred while adding datapoints to the dataset: {e}")
|
|
294
|
-
upload_progress = self.__openapi_service.dataset_api.dataset_dataset_id_progress_get(
|
|
295
|
-
dataset_id=dataset.id
|
|
296
|
-
)
|
|
297
|
-
if upload_progress.ready == 0:
|
|
298
|
-
raise RuntimeError("None of the media was uploaded successfully. Please check the media paths and try again.")
|
|
299
|
-
|
|
300
|
-
logger.warning(f"{upload_progress.failed} datapoints failed to upload. \n{upload_progress.ready} datapoints were uploaded successfully. \nEvaluation will continue with the uploaded datapoints.")
|
|
285
|
+
logger.info(f"Participant created: {participant_result.participant_id}")
|
|
301
286
|
|
|
302
|
-
self.__openapi_service
|
|
303
|
-
|
|
287
|
+
participant = BenchmarkParticipant(name, participant_result.participant_id, self.__openapi_service)
|
|
288
|
+
|
|
289
|
+
successful_uploads, failed_uploads = participant.upload_media(
|
|
290
|
+
assets,
|
|
291
|
+
identifiers,
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
total_uploads = len(assets)
|
|
295
|
+
success_rate = (len(successful_uploads) / total_uploads * 100) if total_uploads > 0 else 0
|
|
296
|
+
logger.info(f"Upload complete: {len(successful_uploads)} successful, {len(failed_uploads)} failed ({success_rate:.1f}% success rate)")
|
|
297
|
+
|
|
298
|
+
if failed_uploads:
|
|
299
|
+
logger.error(f"Failed uploads for media: {[asset.path for asset in failed_uploads]}")
|
|
300
|
+
logger.warning("Some uploads failed. The model evaluation may be incomplete.")
|
|
301
|
+
|
|
302
|
+
if len(successful_uploads) == 0:
|
|
303
|
+
raise RuntimeError("No uploads were successful. The model evaluation will not be completed.")
|
|
304
|
+
|
|
305
|
+
self.__openapi_service.participant_api.participants_participant_id_submit_post(
|
|
304
306
|
participant_id=participant_result.participant_id
|
|
305
307
|
)
|
|
306
308
|
|
|
@@ -8,4 +8,4 @@ from .shuffling_selection import ShufflingSelection
|
|
|
8
8
|
from .ab_test_selection import AbTestSelection
|
|
9
9
|
from .static_selection import StaticSelection
|
|
10
10
|
from .retrieval_modes import RetrievalMode
|
|
11
|
-
from .effort_selection import
|
|
11
|
+
from .effort_selection import EffortSelection
|
|
@@ -3,9 +3,16 @@ from rapidata.api_client.models.effort_capped_selection import EffortCappedSelec
|
|
|
3
3
|
from rapidata.rapidata_client.selection.retrieval_modes import RetrievalMode
|
|
4
4
|
|
|
5
5
|
|
|
6
|
-
class
|
|
7
|
-
|
|
6
|
+
class EffortSelection(RapidataSelection):
|
|
7
|
+
"""
|
|
8
|
+
With this selection you can define the effort budget you have for a task.
|
|
9
|
+
As an example, you have a task that takes 10 seconds to complete. The effort budget would be 10.
|
|
8
10
|
|
|
11
|
+
Args:
|
|
12
|
+
effort_budget (int): The effort budget for the task.
|
|
13
|
+
retrieval_mode (RetrievalMode): The retrieval mode for the task.
|
|
14
|
+
max_iterations (int | None): The maximum number of iterations for the task.
|
|
15
|
+
"""
|
|
9
16
|
def __init__(self, effort_budget: int, retrieval_mode: RetrievalMode = RetrievalMode.Shuffled, max_iterations: int | None = None):
|
|
10
17
|
self.effort_budget = effort_budget
|
|
11
18
|
self.retrieval_mode = retrieval_mode
|
|
@@ -10,6 +10,7 @@ from rapidata.api_client.api.rapid_api import RapidApi
|
|
|
10
10
|
from rapidata.api_client.api.leaderboard_api import LeaderboardApi
|
|
11
11
|
from rapidata.api_client.api.validation_set_api import ValidationSetApi
|
|
12
12
|
from rapidata.api_client.api.workflow_api import WorkflowApi
|
|
13
|
+
from rapidata.api_client.api.participant_api import ParticipantApi
|
|
13
14
|
from rapidata.api_client.configuration import Configuration
|
|
14
15
|
from rapidata.service.credential_manager import CredentialManager
|
|
15
16
|
from rapidata.rapidata_client.api.rapidata_exception import RapidataApiClient
|
|
@@ -117,6 +118,10 @@ class OpenAPIService:
|
|
|
117
118
|
@property
|
|
118
119
|
def benchmark_api(self) -> BenchmarkApi:
|
|
119
120
|
return BenchmarkApi(self.api_client)
|
|
121
|
+
|
|
122
|
+
@property
|
|
123
|
+
def participant_api(self) -> ParticipantApi:
|
|
124
|
+
return ParticipantApi(self.api_client)
|
|
120
125
|
|
|
121
126
|
def _get_rapidata_package_version(self):
|
|
122
127
|
"""
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
rapidata/__init__.py,sha256=
|
|
1
|
+
rapidata/__init__.py,sha256=w07T5zXe_HttCELcde6bMq0eSbESIQnAIm6zEPLdMg8,897
|
|
2
2
|
rapidata/api_client/__init__.py,sha256=tNSCpLEs-AyEZGCAYz9MM8gDEpA4KJKcdNL-dcvAAw0,34404
|
|
3
3
|
rapidata/api_client/api/__init__.py,sha256=qjLeeJSnuPF_ar_nLknjnOqStBQnoCiz-O_rfZUBZrE,1489
|
|
4
4
|
rapidata/api_client/api/benchmark_api.py,sha256=fr4krx4f3yN--DswD_Prpz-KU81ooG3Lcy-30_KU0dw,129751
|
|
@@ -533,13 +533,15 @@ rapidata/api_client/models/workflow_state.py,sha256=5LAK1se76RCoozeVB6oxMPb8p_5b
|
|
|
533
533
|
rapidata/api_client/models/zip_entry_file_wrapper.py,sha256=06CoNJD3x511K3rnSmkrwwhc9GbQxwxF-c0ldOyJbAs,4240
|
|
534
534
|
rapidata/api_client/rest.py,sha256=rtIMcgINZOUaDFaJIinJkXRSddNJmXvMRMfgO2Ezk2o,10835
|
|
535
535
|
rapidata/api_client_README.md,sha256=sj425Ki-qiO2DCHnJ06r9LjfnGir7UpgXEonMh-LFag,62126
|
|
536
|
-
rapidata/rapidata_client/__init__.py,sha256=
|
|
536
|
+
rapidata/rapidata_client/__init__.py,sha256=CfkQxCdURXzJsVP6sxKmufze2u-IE_snG_G8NEkE_JM,1225
|
|
537
537
|
rapidata/rapidata_client/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
538
538
|
rapidata/rapidata_client/api/rapidata_exception.py,sha256=BIdmHRrJUGW-Mqhp1H_suemZaR6w9TgjWq-ZW5iUPdQ,3878
|
|
539
539
|
rapidata/rapidata_client/benchmark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
540
540
|
rapidata/rapidata_client/benchmark/leaderboard/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
541
541
|
rapidata/rapidata_client/benchmark/leaderboard/rapidata_leaderboard.py,sha256=BDI0xJkTumbZy4dYqkzXy074jC9eaVWoJJDZ84uvatE,3906
|
|
542
|
-
rapidata/rapidata_client/benchmark/
|
|
542
|
+
rapidata/rapidata_client/benchmark/participant/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
543
|
+
rapidata/rapidata_client/benchmark/participant/_participant.py,sha256=yN82EWrZXYszsM8Ns0HRMXCTivltkyxcpGRK-cdT01Y,3683
|
|
544
|
+
rapidata/rapidata_client/benchmark/rapidata_benchmark.py,sha256=03MXV3FWSWfm2iyOifXt-43wruzIPQenNBPmGZGKdds,13328
|
|
543
545
|
rapidata/rapidata_client/benchmark/rapidata_benchmark_manager.py,sha256=_0ot7zRj1de5admMO7NZ7qVSCkwTYu6xOZI5dUkITuI,4592
|
|
544
546
|
rapidata/rapidata_client/country_codes/__init__.py,sha256=FB9Dcks44J6C6YBSYmTmNZ71tE130x6NO_3aLJ8fKzQ,40
|
|
545
547
|
rapidata/rapidata_client/country_codes/country_codes.py,sha256=ePHqeb7y9DWQZAnddBzPx1puYBcrgUjdR2sbFijuFD8,283
|
|
@@ -596,13 +598,13 @@ rapidata/rapidata_client/referee/__init__.py,sha256=q0Hv9nmfEpyChejtyMLT8hWKL0vT
|
|
|
596
598
|
rapidata/rapidata_client/referee/_base_referee.py,sha256=MdFOhdxt3sRnWXLDKLJZKFdVpjBGn9jypPnWWQ6msQA,496
|
|
597
599
|
rapidata/rapidata_client/referee/_early_stopping_referee.py,sha256=ULbokQZ91wc9D_20qHUhe55D28D9eTY1J1cMp_-oIDc,2088
|
|
598
600
|
rapidata/rapidata_client/referee/_naive_referee.py,sha256=PVR8uy8hfRjr2DBzdOFyvou6S3swNc-4UvgjhO-09TU,1209
|
|
599
|
-
rapidata/rapidata_client/selection/__init__.py,sha256=
|
|
601
|
+
rapidata/rapidata_client/selection/__init__.py,sha256=1QsMUieM-oleyOXX2mOhP36_P4h1foIIr3rW8WT8Eg0,564
|
|
600
602
|
rapidata/rapidata_client/selection/_base_selection.py,sha256=tInbWOgxT_4CHkr5QHoG55ZcUi1ZmfcEGIwLKKCnN20,147
|
|
601
603
|
rapidata/rapidata_client/selection/ab_test_selection.py,sha256=fymubkVMawqJmYp9FKzWXTki9tgBgoj3cOP8rG9oOd0,1284
|
|
602
604
|
rapidata/rapidata_client/selection/capped_selection.py,sha256=iWhbM1LcayhgFm7oKADXCaKHGdiQIupI0jbYuuEVM2A,1184
|
|
603
605
|
rapidata/rapidata_client/selection/conditional_validation_selection.py,sha256=OcNYSBi19vIcy2bLDmj9cv-gg5LFSvdjc3tooV0Z7Oc,2842
|
|
604
606
|
rapidata/rapidata_client/selection/demographic_selection.py,sha256=l4vnNbzlf9ED6BKqN4k5cZXShkXu9L1C5DtO78Vwr5M,1454
|
|
605
|
-
rapidata/rapidata_client/selection/effort_selection.py,sha256=
|
|
607
|
+
rapidata/rapidata_client/selection/effort_selection.py,sha256=1p4CtwVJIyf4HZ-mPn0ohloe9dBxJFLhStG6jQNuxnE,1266
|
|
606
608
|
rapidata/rapidata_client/selection/labeling_selection.py,sha256=0X8DJHgwvgwekEbzVxWPyzZ1QAPcULZNDjfLQYUlcLM,1348
|
|
607
609
|
rapidata/rapidata_client/selection/rapidata_selections.py,sha256=lgwRivdzSnCri3K-Z-ngqR5RXwTl7iYuKTRpuyl5UMY,1853
|
|
608
610
|
rapidata/rapidata_client/selection/retrieval_modes.py,sha256=J2jzPEJ4wdllm_RnU_FYPh3eO3xeZS7QUk-NXgTB2u4,668
|
|
@@ -642,8 +644,8 @@ rapidata/rapidata_client/workflow/_timestamp_workflow.py,sha256=tPi2zu1-SlE_ppbG
|
|
|
642
644
|
rapidata/service/__init__.py,sha256=s9bS1AJZaWIhLtJX_ZA40_CK39rAAkwdAmymTMbeWl4,68
|
|
643
645
|
rapidata/service/credential_manager.py,sha256=pUEEtp6VrFWYhfUUtyqmS0AlRqe2Y0kFkY6o22IT4KM,8682
|
|
644
646
|
rapidata/service/local_file_service.py,sha256=pgorvlWcx52Uh3cEG6VrdMK_t__7dacQ_5AnfY14BW8,877
|
|
645
|
-
rapidata/service/openapi_service.py,sha256=
|
|
646
|
-
rapidata-2.33.
|
|
647
|
-
rapidata-2.33.
|
|
648
|
-
rapidata-2.33.
|
|
649
|
-
rapidata-2.33.
|
|
647
|
+
rapidata/service/openapi_service.py,sha256=v2fhPbHmD0J11ZRZY6f80PdIdGwpRFlbfMH9t8Ypg5A,5403
|
|
648
|
+
rapidata-2.33.2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
649
|
+
rapidata-2.33.2.dist-info/METADATA,sha256=YkByWAPnAWFN9E9m-9KbNDsgj9kV2kXuQcqA7raN2N0,1264
|
|
650
|
+
rapidata-2.33.2.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
651
|
+
rapidata-2.33.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|