rapidata 2.33.1__py3-none-any.whl → 2.33.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rapidata might be problematic. Click here for more details.

rapidata/__init__.py CHANGED
@@ -1,10 +1,10 @@
1
- __version__ = "2.33.1"
1
+ __version__ = "2.33.2"
2
2
 
3
3
  from .rapidata_client import (
4
4
  RapidataClient,
5
5
  DemographicSelection,
6
6
  LabelingSelection,
7
- EffortEstimationSelection,
7
+ EffortSelection,
8
8
  RetrievalMode,
9
9
  ValidationSelection,
10
10
  ConditionalValidationSelection,
@@ -7,7 +7,7 @@ from .selection import (
7
7
  CappedSelection,
8
8
  ShufflingSelection,
9
9
  RetrievalMode,
10
- EffortEstimationSelection,
10
+ EffortSelection,
11
11
  )
12
12
  from .datapoints import Datapoint
13
13
  from .datapoints.metadata import (
@@ -0,0 +1,102 @@
1
+ from concurrent.futures import ThreadPoolExecutor, as_completed
2
+ from tqdm import tqdm
3
+
4
+ from rapidata.rapidata_client.datapoints.assets import MediaAsset
5
+ from rapidata.rapidata_client.logging import logger
6
+ from rapidata.rapidata_client.logging.output_manager import RapidataOutputManager
7
+ from rapidata.api_client.models.create_sample_model import CreateSampleModel
8
+ from rapidata.service.openapi_service import OpenAPIService
9
+
10
+
11
+ class BenchmarkParticipant:
12
+ def __init__(self, name: str, id: str, openapi_service: OpenAPIService):
13
+ self.name = name
14
+ self.id = id
15
+ self.__openapi_service = openapi_service
16
+
17
+ def _process_single_sample_upload(
18
+ self,
19
+ asset: MediaAsset,
20
+ identifier: str,
21
+ ) -> tuple[MediaAsset | None, MediaAsset | None]:
22
+ """
23
+ Process single sample upload with retry logic and error tracking.
24
+
25
+ Args:
26
+ asset: MediaAsset to upload
27
+ identifier: Identifier for the sample
28
+
29
+ Returns:
30
+ tuple[MediaAsset | None, MediaAsset | None]: (successful_asset, failed_asset)
31
+ """
32
+ if asset.is_local():
33
+ files = [asset.to_file()]
34
+ urls = []
35
+ else:
36
+ files = []
37
+ urls = [asset.path]
38
+
39
+ last_exception = None
40
+ try:
41
+ self.__openapi_service.participant_api.participant_participant_id_sample_post(
42
+ participant_id=self.id,
43
+ model=CreateSampleModel(
44
+ identifier=identifier
45
+ ),
46
+ files=files,
47
+ urls=urls
48
+ )
49
+
50
+ return asset, None
51
+
52
+ except Exception as e:
53
+ last_exception = e
54
+
55
+ logger.error(f"Upload failed for {identifier}. Error: {str(last_exception)}")
56
+ return None, asset
57
+
58
+ def upload_media(
59
+ self,
60
+ assets: list[MediaAsset],
61
+ identifiers: list[str],
62
+ max_workers: int = 10,
63
+ ) -> tuple[list[MediaAsset], list[MediaAsset]]:
64
+ """
65
+ Upload samples concurrently with proper error handling and progress tracking.
66
+
67
+ Args:
68
+ assets: List of MediaAsset objects to upload
69
+ identifiers: List of identifiers matching the assets
70
+ max_workers: Maximum number of concurrent upload workers
71
+
72
+ Returns:
73
+ tuple[list[str], list[str]]: Lists of successful and failed identifiers
74
+ """
75
+ successful_uploads: list[MediaAsset] = []
76
+ failed_uploads: list[MediaAsset] = []
77
+ total_uploads = len(assets)
78
+
79
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
80
+ futures = [
81
+ executor.submit(
82
+ self._process_single_sample_upload,
83
+ asset,
84
+ identifier,
85
+ )
86
+ for asset, identifier in zip(assets, identifiers)
87
+ ]
88
+
89
+ with tqdm(total=total_uploads, desc="Uploading media", disable=RapidataOutputManager.silent_mode) as pbar:
90
+ for future in as_completed(futures):
91
+ try:
92
+ successful_id, failed_id = future.result()
93
+ if successful_id:
94
+ successful_uploads.append(successful_id)
95
+ if failed_id:
96
+ failed_uploads.append(failed_id)
97
+ except Exception as e:
98
+ logger.error(f"Future execution failed: {str(e)}")
99
+
100
+ pbar.update(1)
101
+
102
+ return successful_uploads, failed_uploads
@@ -11,14 +11,13 @@ from rapidata.api_client.models.url_asset_input import UrlAssetInput
11
11
  from rapidata.api_client.models.file_asset_model import FileAssetModel
12
12
  from rapidata.api_client.models.source_url_metadata_model import SourceUrlMetadataModel
13
13
 
14
+
15
+ from rapidata.rapidata_client.benchmark.participant._participant import BenchmarkParticipant
14
16
  from rapidata.rapidata_client.logging import logger
15
17
  from rapidata.service.openapi_service import OpenAPIService
16
18
 
17
19
  from rapidata.rapidata_client.benchmark.leaderboard.rapidata_leaderboard import RapidataLeaderboard
18
- from rapidata.rapidata_client.datapoints.metadata import PromptIdentifierMetadata
19
20
  from rapidata.rapidata_client.datapoints.assets import MediaAsset
20
- from rapidata.rapidata_client.order._rapidata_dataset import RapidataDataset
21
- from rapidata.rapidata_client.datapoints.datapoint import Datapoint
22
21
 
23
22
  class RapidataBenchmark:
24
23
  """
@@ -250,7 +249,7 @@ class RapidataBenchmark:
250
249
  leaderboard_result.id,
251
250
  self.__openapi_service
252
251
  )
253
-
252
+
254
253
  def evaluate_model(self, name: str, media: list[str], identifiers: list[str]) -> None:
255
254
  """
256
255
  Evaluates a model on the benchmark across all leaderboards.
@@ -272,11 +271,9 @@ class RapidataBenchmark:
272
271
  \nTo see the prompts that are associated with the identifiers, use the prompts property.")
273
272
 
274
273
  # happens before the creation of the participant to ensure all media paths are valid
275
- assets = []
276
- prompts_metadata: list[list[PromptIdentifierMetadata]] = []
277
- for media_path, identifier in zip(media, identifiers):
274
+ assets: list[MediaAsset] = []
275
+ for media_path in media:
278
276
  assets.append(MediaAsset(media_path))
279
- prompts_metadata.append([PromptIdentifierMetadata(identifier=identifier)])
280
277
 
281
278
  participant_result = self.__openapi_service.benchmark_api.benchmark_benchmark_id_participants_post(
282
279
  benchmark_id=self.id,
@@ -285,22 +282,27 @@ class RapidataBenchmark:
285
282
  )
286
283
  )
287
284
 
288
- dataset = RapidataDataset(participant_result.dataset_id, self.__openapi_service)
289
-
290
- try:
291
- dataset.add_datapoints([Datapoint(asset=asset, metadata=metadata) for asset, metadata in zip(assets, prompts_metadata)])
292
- except Exception as e:
293
- logger.warning(f"An error occurred while adding datapoints to the dataset: {e}")
294
- upload_progress = self.__openapi_service.dataset_api.dataset_dataset_id_progress_get(
295
- dataset_id=dataset.id
296
- )
297
- if upload_progress.ready == 0:
298
- raise RuntimeError("None of the media was uploaded successfully. Please check the media paths and try again.")
299
-
300
- logger.warning(f"{upload_progress.failed} datapoints failed to upload. \n{upload_progress.ready} datapoints were uploaded successfully. \nEvaluation will continue with the uploaded datapoints.")
285
+ logger.info(f"Participant created: {participant_result.participant_id}")
301
286
 
302
- self.__openapi_service.benchmark_api.benchmark_benchmark_id_participants_participant_id_submit_post(
303
- benchmark_id=self.id,
287
+ participant = BenchmarkParticipant(name, participant_result.participant_id, self.__openapi_service)
288
+
289
+ successful_uploads, failed_uploads = participant.upload_media(
290
+ assets,
291
+ identifiers,
292
+ )
293
+
294
+ total_uploads = len(assets)
295
+ success_rate = (len(successful_uploads) / total_uploads * 100) if total_uploads > 0 else 0
296
+ logger.info(f"Upload complete: {len(successful_uploads)} successful, {len(failed_uploads)} failed ({success_rate:.1f}% success rate)")
297
+
298
+ if failed_uploads:
299
+ logger.error(f"Failed uploads for media: {[asset.path for asset in failed_uploads]}")
300
+ logger.warning("Some uploads failed. The model evaluation may be incomplete.")
301
+
302
+ if len(successful_uploads) == 0:
303
+ raise RuntimeError("No uploads were successful. The model evaluation will not be completed.")
304
+
305
+ self.__openapi_service.participant_api.participants_participant_id_submit_post(
304
306
  participant_id=participant_result.participant_id
305
307
  )
306
308
 
@@ -8,4 +8,4 @@ from .shuffling_selection import ShufflingSelection
8
8
  from .ab_test_selection import AbTestSelection
9
9
  from .static_selection import StaticSelection
10
10
  from .retrieval_modes import RetrievalMode
11
- from .effort_selection import EffortEstimationSelection
11
+ from .effort_selection import EffortSelection
@@ -3,9 +3,16 @@ from rapidata.api_client.models.effort_capped_selection import EffortCappedSelec
3
3
  from rapidata.rapidata_client.selection.retrieval_modes import RetrievalMode
4
4
 
5
5
 
6
- class EffortEstimationSelection(RapidataSelection):
7
-
6
+ class EffortSelection(RapidataSelection):
7
+ """
8
+ With this selection you can define the effort budget you have for a task.
9
+ As an example, you have a task that takes 10 seconds to complete. The effort budget would be 10.
8
10
 
11
+ Args:
12
+ effort_budget (int): The effort budget for the task.
13
+ retrieval_mode (RetrievalMode): The retrieval mode for the task.
14
+ max_iterations (int | None): The maximum number of iterations for the task.
15
+ """
9
16
  def __init__(self, effort_budget: int, retrieval_mode: RetrievalMode = RetrievalMode.Shuffled, max_iterations: int | None = None):
10
17
  self.effort_budget = effort_budget
11
18
  self.retrieval_mode = retrieval_mode
@@ -10,6 +10,7 @@ from rapidata.api_client.api.rapid_api import RapidApi
10
10
  from rapidata.api_client.api.leaderboard_api import LeaderboardApi
11
11
  from rapidata.api_client.api.validation_set_api import ValidationSetApi
12
12
  from rapidata.api_client.api.workflow_api import WorkflowApi
13
+ from rapidata.api_client.api.participant_api import ParticipantApi
13
14
  from rapidata.api_client.configuration import Configuration
14
15
  from rapidata.service.credential_manager import CredentialManager
15
16
  from rapidata.rapidata_client.api.rapidata_exception import RapidataApiClient
@@ -117,6 +118,10 @@ class OpenAPIService:
117
118
  @property
118
119
  def benchmark_api(self) -> BenchmarkApi:
119
120
  return BenchmarkApi(self.api_client)
121
+
122
+ @property
123
+ def participant_api(self) -> ParticipantApi:
124
+ return ParticipantApi(self.api_client)
120
125
 
121
126
  def _get_rapidata_package_version(self):
122
127
  """
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: rapidata
3
- Version: 2.33.1
3
+ Version: 2.33.2
4
4
  Summary: Rapidata package containing the Rapidata Python Client to interact with the Rapidata Web API in an easy way.
5
5
  License: Apache-2.0
6
6
  Author: Rapidata AG
@@ -1,4 +1,4 @@
1
- rapidata/__init__.py,sha256=nd-IuE9FDLGAl5tk6vhY6k8ZSxu-XK6PiMa9WY2_Buk,907
1
+ rapidata/__init__.py,sha256=w07T5zXe_HttCELcde6bMq0eSbESIQnAIm6zEPLdMg8,897
2
2
  rapidata/api_client/__init__.py,sha256=tNSCpLEs-AyEZGCAYz9MM8gDEpA4KJKcdNL-dcvAAw0,34404
3
3
  rapidata/api_client/api/__init__.py,sha256=qjLeeJSnuPF_ar_nLknjnOqStBQnoCiz-O_rfZUBZrE,1489
4
4
  rapidata/api_client/api/benchmark_api.py,sha256=fr4krx4f3yN--DswD_Prpz-KU81ooG3Lcy-30_KU0dw,129751
@@ -533,13 +533,15 @@ rapidata/api_client/models/workflow_state.py,sha256=5LAK1se76RCoozeVB6oxMPb8p_5b
533
533
  rapidata/api_client/models/zip_entry_file_wrapper.py,sha256=06CoNJD3x511K3rnSmkrwwhc9GbQxwxF-c0ldOyJbAs,4240
534
534
  rapidata/api_client/rest.py,sha256=rtIMcgINZOUaDFaJIinJkXRSddNJmXvMRMfgO2Ezk2o,10835
535
535
  rapidata/api_client_README.md,sha256=sj425Ki-qiO2DCHnJ06r9LjfnGir7UpgXEonMh-LFag,62126
536
- rapidata/rapidata_client/__init__.py,sha256=VXI4s0R3D6qZYveZaP7eliG-YIxmkCIwOzfZTS_MWZc,1235
536
+ rapidata/rapidata_client/__init__.py,sha256=CfkQxCdURXzJsVP6sxKmufze2u-IE_snG_G8NEkE_JM,1225
537
537
  rapidata/rapidata_client/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
538
538
  rapidata/rapidata_client/api/rapidata_exception.py,sha256=BIdmHRrJUGW-Mqhp1H_suemZaR6w9TgjWq-ZW5iUPdQ,3878
539
539
  rapidata/rapidata_client/benchmark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
540
540
  rapidata/rapidata_client/benchmark/leaderboard/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
541
541
  rapidata/rapidata_client/benchmark/leaderboard/rapidata_leaderboard.py,sha256=BDI0xJkTumbZy4dYqkzXy074jC9eaVWoJJDZ84uvatE,3906
542
- rapidata/rapidata_client/benchmark/rapidata_benchmark.py,sha256=CKMkP25_UzuT35ujJuKeZUgwHY5xfZP-BDcnig7wy7c,13634
542
+ rapidata/rapidata_client/benchmark/participant/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
543
+ rapidata/rapidata_client/benchmark/participant/_participant.py,sha256=yN82EWrZXYszsM8Ns0HRMXCTivltkyxcpGRK-cdT01Y,3683
544
+ rapidata/rapidata_client/benchmark/rapidata_benchmark.py,sha256=03MXV3FWSWfm2iyOifXt-43wruzIPQenNBPmGZGKdds,13328
543
545
  rapidata/rapidata_client/benchmark/rapidata_benchmark_manager.py,sha256=_0ot7zRj1de5admMO7NZ7qVSCkwTYu6xOZI5dUkITuI,4592
544
546
  rapidata/rapidata_client/country_codes/__init__.py,sha256=FB9Dcks44J6C6YBSYmTmNZ71tE130x6NO_3aLJ8fKzQ,40
545
547
  rapidata/rapidata_client/country_codes/country_codes.py,sha256=ePHqeb7y9DWQZAnddBzPx1puYBcrgUjdR2sbFijuFD8,283
@@ -596,13 +598,13 @@ rapidata/rapidata_client/referee/__init__.py,sha256=q0Hv9nmfEpyChejtyMLT8hWKL0vT
596
598
  rapidata/rapidata_client/referee/_base_referee.py,sha256=MdFOhdxt3sRnWXLDKLJZKFdVpjBGn9jypPnWWQ6msQA,496
597
599
  rapidata/rapidata_client/referee/_early_stopping_referee.py,sha256=ULbokQZ91wc9D_20qHUhe55D28D9eTY1J1cMp_-oIDc,2088
598
600
  rapidata/rapidata_client/referee/_naive_referee.py,sha256=PVR8uy8hfRjr2DBzdOFyvou6S3swNc-4UvgjhO-09TU,1209
599
- rapidata/rapidata_client/selection/__init__.py,sha256=vC2XbykShj_VW1uz5IZfQQXjgeIzzdYqC3n0K2c8cIs,574
601
+ rapidata/rapidata_client/selection/__init__.py,sha256=1QsMUieM-oleyOXX2mOhP36_P4h1foIIr3rW8WT8Eg0,564
600
602
  rapidata/rapidata_client/selection/_base_selection.py,sha256=tInbWOgxT_4CHkr5QHoG55ZcUi1ZmfcEGIwLKKCnN20,147
601
603
  rapidata/rapidata_client/selection/ab_test_selection.py,sha256=fymubkVMawqJmYp9FKzWXTki9tgBgoj3cOP8rG9oOd0,1284
602
604
  rapidata/rapidata_client/selection/capped_selection.py,sha256=iWhbM1LcayhgFm7oKADXCaKHGdiQIupI0jbYuuEVM2A,1184
603
605
  rapidata/rapidata_client/selection/conditional_validation_selection.py,sha256=OcNYSBi19vIcy2bLDmj9cv-gg5LFSvdjc3tooV0Z7Oc,2842
604
606
  rapidata/rapidata_client/selection/demographic_selection.py,sha256=l4vnNbzlf9ED6BKqN4k5cZXShkXu9L1C5DtO78Vwr5M,1454
605
- rapidata/rapidata_client/selection/effort_selection.py,sha256=uS8ctK2o-40Blu02jB5w7i8WtRSw21LhXszkkq30pM8,858
607
+ rapidata/rapidata_client/selection/effort_selection.py,sha256=1p4CtwVJIyf4HZ-mPn0ohloe9dBxJFLhStG6jQNuxnE,1266
606
608
  rapidata/rapidata_client/selection/labeling_selection.py,sha256=0X8DJHgwvgwekEbzVxWPyzZ1QAPcULZNDjfLQYUlcLM,1348
607
609
  rapidata/rapidata_client/selection/rapidata_selections.py,sha256=lgwRivdzSnCri3K-Z-ngqR5RXwTl7iYuKTRpuyl5UMY,1853
608
610
  rapidata/rapidata_client/selection/retrieval_modes.py,sha256=J2jzPEJ4wdllm_RnU_FYPh3eO3xeZS7QUk-NXgTB2u4,668
@@ -642,8 +644,8 @@ rapidata/rapidata_client/workflow/_timestamp_workflow.py,sha256=tPi2zu1-SlE_ppbG
642
644
  rapidata/service/__init__.py,sha256=s9bS1AJZaWIhLtJX_ZA40_CK39rAAkwdAmymTMbeWl4,68
643
645
  rapidata/service/credential_manager.py,sha256=pUEEtp6VrFWYhfUUtyqmS0AlRqe2Y0kFkY6o22IT4KM,8682
644
646
  rapidata/service/local_file_service.py,sha256=pgorvlWcx52Uh3cEG6VrdMK_t__7dacQ_5AnfY14BW8,877
645
- rapidata/service/openapi_service.py,sha256=xoGBACpUhG0H-tadSBa8A91LHyfI7n-FCT2JlrERqco,5221
646
- rapidata-2.33.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
647
- rapidata-2.33.1.dist-info/METADATA,sha256=lDYoDN6gtzloaI4-8X_ifYr2RCCrHt_7LeyCi6RM_iE,1264
648
- rapidata-2.33.1.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
649
- rapidata-2.33.1.dist-info/RECORD,,
647
+ rapidata/service/openapi_service.py,sha256=v2fhPbHmD0J11ZRZY6f80PdIdGwpRFlbfMH9t8Ypg5A,5403
648
+ rapidata-2.33.2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
649
+ rapidata-2.33.2.dist-info/METADATA,sha256=YkByWAPnAWFN9E9m-9KbNDsgj9kV2kXuQcqA7raN2N0,1264
650
+ rapidata-2.33.2.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
651
+ rapidata-2.33.2.dist-info/RECORD,,