rapidata 2.41.3__py3-none-any.whl → 2.42.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rapidata might be problematic. Click here for more details.

Files changed (74) hide show
  1. rapidata/__init__.py +1 -5
  2. rapidata/api_client/__init__.py +14 -14
  3. rapidata/api_client/api/__init__.py +1 -0
  4. rapidata/api_client/api/asset_api.py +851 -0
  5. rapidata/api_client/api/benchmark_api.py +298 -0
  6. rapidata/api_client/api/customer_rapid_api.py +29 -43
  7. rapidata/api_client/api/dataset_api.py +163 -1143
  8. rapidata/api_client/api/participant_api.py +28 -74
  9. rapidata/api_client/api/validation_set_api.py +283 -0
  10. rapidata/api_client/models/__init__.py +13 -14
  11. rapidata/api_client/models/add_validation_rapid_model.py +3 -3
  12. rapidata/api_client/models/add_validation_rapid_new_model.py +152 -0
  13. rapidata/api_client/models/add_validation_rapid_new_model_asset.py +182 -0
  14. rapidata/api_client/models/compare_workflow_model.py +3 -3
  15. rapidata/api_client/models/create_datapoint_from_files_model.py +3 -3
  16. rapidata/api_client/models/create_datapoint_from_text_sources_model.py +3 -3
  17. rapidata/api_client/models/create_datapoint_from_urls_model.py +3 -3
  18. rapidata/api_client/models/create_datapoint_model.py +108 -0
  19. rapidata/api_client/models/create_datapoint_model_asset.py +182 -0
  20. rapidata/api_client/models/create_demographic_rapid_model.py +13 -2
  21. rapidata/api_client/models/create_demographic_rapid_model_asset.py +188 -0
  22. rapidata/api_client/models/create_demographic_rapid_model_new.py +119 -0
  23. rapidata/api_client/models/create_sample_model.py +8 -2
  24. rapidata/api_client/models/create_sample_model_asset.py +182 -0
  25. rapidata/api_client/models/create_sample_model_obsolete.py +87 -0
  26. rapidata/api_client/models/file_asset_input_file.py +8 -22
  27. rapidata/api_client/models/fork_benchmark_result.py +87 -0
  28. rapidata/api_client/models/form_file_wrapper.py +17 -2
  29. rapidata/api_client/models/get_asset_metadata_result.py +100 -0
  30. rapidata/api_client/models/multi_asset_input_assets_inner.py +10 -24
  31. rapidata/api_client/models/prompt_asset_metadata_input.py +3 -3
  32. rapidata/api_client/models/proxy_file_wrapper.py +17 -2
  33. rapidata/api_client/models/stream_file_wrapper.py +25 -3
  34. rapidata/api_client/models/submit_prompt_model.py +3 -3
  35. rapidata/api_client/models/text_metadata.py +6 -1
  36. rapidata/api_client/models/text_metadata_model.py +7 -2
  37. rapidata/api_client/models/upload_file_from_url_result.py +87 -0
  38. rapidata/api_client/models/upload_file_result.py +87 -0
  39. rapidata/api_client/models/zip_entry_file_wrapper.py +33 -2
  40. rapidata/api_client_README.md +28 -25
  41. rapidata/rapidata_client/__init__.py +0 -1
  42. rapidata/rapidata_client/benchmark/participant/_participant.py +25 -24
  43. rapidata/rapidata_client/benchmark/rapidata_benchmark.py +89 -102
  44. rapidata/rapidata_client/datapoints/__init__.py +0 -1
  45. rapidata/rapidata_client/datapoints/_asset_uploader.py +71 -0
  46. rapidata/rapidata_client/datapoints/_datapoint.py +58 -171
  47. rapidata/rapidata_client/datapoints/_datapoint_uploader.py +95 -0
  48. rapidata/rapidata_client/datapoints/assets/__init__.py +0 -11
  49. rapidata/rapidata_client/datapoints/metadata/_media_asset_metadata.py +10 -7
  50. rapidata/rapidata_client/demographic/demographic_manager.py +21 -8
  51. rapidata/rapidata_client/exceptions/failed_upload_exception.py +0 -62
  52. rapidata/rapidata_client/order/_rapidata_order_builder.py +0 -10
  53. rapidata/rapidata_client/order/dataset/_rapidata_dataset.py +65 -187
  54. rapidata/rapidata_client/order/rapidata_order_manager.py +62 -124
  55. rapidata/rapidata_client/validation/rapidata_validation_set.py +9 -5
  56. rapidata/rapidata_client/validation/rapids/_validation_rapid_uploader.py +101 -0
  57. rapidata/rapidata_client/validation/rapids/box.py +35 -11
  58. rapidata/rapidata_client/validation/rapids/rapids.py +26 -128
  59. rapidata/rapidata_client/validation/rapids/rapids_manager.py +123 -104
  60. rapidata/rapidata_client/validation/validation_set_manager.py +41 -38
  61. rapidata/rapidata_client/workflow/_ranking_workflow.py +14 -17
  62. rapidata/rapidata_client/workflow/_select_words_workflow.py +3 -16
  63. rapidata/service/openapi_service.py +8 -3
  64. {rapidata-2.41.3.dist-info → rapidata-2.42.1.dist-info}/METADATA +1 -1
  65. {rapidata-2.41.3.dist-info → rapidata-2.42.1.dist-info}/RECORD +67 -58
  66. {rapidata-2.41.3.dist-info → rapidata-2.42.1.dist-info}/WHEEL +1 -1
  67. rapidata/rapidata_client/datapoints/assets/_base_asset.py +0 -13
  68. rapidata/rapidata_client/datapoints/assets/_media_asset.py +0 -318
  69. rapidata/rapidata_client/datapoints/assets/_multi_asset.py +0 -61
  70. rapidata/rapidata_client/datapoints/assets/_sessions.py +0 -40
  71. rapidata/rapidata_client/datapoints/assets/_text_asset.py +0 -34
  72. rapidata/rapidata_client/datapoints/assets/data_type_enum.py +0 -8
  73. rapidata/rapidata_client/order/dataset/_progress_tracker.py +0 -100
  74. {rapidata-2.41.3.dist-info → rapidata-2.42.1.dist-info}/licenses/LICENSE +0 -0
@@ -47,7 +47,7 @@ class ValidationSetManager:
47
47
 
48
48
  def __init__(self, openapi_service: OpenAPIService) -> None:
49
49
  self.__openapi_service = openapi_service
50
- self.rapid = RapidsManager()
50
+ self.rapid = RapidsManager(openapi_service)
51
51
  logger.debug("ValidationSetManager initialized")
52
52
 
53
53
  def _create_order_validation_set(
@@ -66,11 +66,13 @@ class ValidationSetManager:
66
66
  Rapid(
67
67
  asset=datapoint.asset,
68
68
  payload=workflow._to_payload(datapoint),
69
- metadata=datapoint.metadata,
69
+ context=datapoint.context,
70
+ media_context=datapoint.media_context,
71
+ data_type=datapoint.data_type,
70
72
  settings=settings,
71
73
  )
72
74
  )
73
- return self._submit(name=order_name, rapids=rapids, dimensions=None)
75
+ return self._submit(name=order_name, rapids=rapids, dimensions=[])
74
76
 
75
77
  def create_classification_set(
76
78
  self,
@@ -143,11 +145,6 @@ class ValidationSetManager:
143
145
  logger.debug("Creating classification rapids")
144
146
  rapids: list[Rapid] = []
145
147
  for i in range(len(datapoints)):
146
- rapid_metadata = []
147
- if contexts:
148
- rapid_metadata.append(PromptMetadata(contexts[i]))
149
- if media_contexts:
150
- rapid_metadata.append(MediaAssetMetadata(media_contexts[i]))
151
148
  rapids.append(
152
149
  self.rapid.classification_rapid(
153
150
  instruction=instruction,
@@ -155,7 +152,10 @@ class ValidationSetManager:
155
152
  datapoint=datapoints[i],
156
153
  truths=truths[i],
157
154
  data_type=data_type,
158
- metadata=rapid_metadata,
155
+ context=contexts[i] if contexts != None else None,
156
+ media_context=(
157
+ media_contexts[i] if media_contexts != None else None
158
+ ),
159
159
  explanation=explanations[i] if explanations != None else None,
160
160
  )
161
161
  )
@@ -231,18 +231,16 @@ class ValidationSetManager:
231
231
  logger.debug("Creating comparison rapids")
232
232
  rapids: list[Rapid] = []
233
233
  for i in range(len(datapoints)):
234
- rapid_metadata = []
235
- if contexts:
236
- rapid_metadata.append(PromptMetadata(contexts[i]))
237
- if media_contexts:
238
- rapid_metadata.append(MediaAssetMetadata(media_contexts[i]))
239
234
  rapids.append(
240
235
  self.rapid.compare_rapid(
241
236
  instruction=instruction,
242
237
  truth=truths[i],
243
238
  datapoint=datapoints[i],
244
239
  data_type=data_type,
245
- metadata=rapid_metadata,
240
+ context=contexts[i] if contexts != None else None,
241
+ media_context=(
242
+ media_contexts[i] if media_contexts != None else None
243
+ ),
246
244
  explanation=explanation[i] if explanation != None else None,
247
245
  )
248
246
  )
@@ -387,17 +385,15 @@ class ValidationSetManager:
387
385
  rapids = []
388
386
  rapids: list[Rapid] = []
389
387
  for i in range(len(datapoints)):
390
- rapid_metadata = []
391
- if contexts:
392
- rapid_metadata.append(PromptMetadata(contexts[i]))
393
- if media_contexts:
394
- rapid_metadata.append(MediaAssetMetadata(media_contexts[i]))
395
388
  rapids.append(
396
389
  self.rapid.locate_rapid(
397
390
  instruction=instruction,
398
391
  truths=truths[i],
399
392
  datapoint=datapoints[i],
400
- metadata=rapid_metadata,
393
+ context=contexts[i] if contexts != None else None,
394
+ media_context=(
395
+ media_contexts[i] if media_contexts != None else None
396
+ ),
401
397
  explanation=explanation[i] if explanation != None else None,
402
398
  )
403
399
  )
@@ -466,17 +462,15 @@ class ValidationSetManager:
466
462
  logger.debug("Creating draw rapids")
467
463
  rapids: list[Rapid] = []
468
464
  for i in range(len(datapoints)):
469
- rapid_metadata = []
470
- if contexts:
471
- rapid_metadata.append(PromptMetadata(contexts[i]))
472
- if media_contexts:
473
- rapid_metadata.append(MediaAssetMetadata(media_contexts[i]))
474
465
  rapids.append(
475
466
  self.rapid.draw_rapid(
476
467
  instruction=instruction,
477
468
  truths=truths[i],
478
469
  datapoint=datapoints[i],
479
- metadata=rapid_metadata,
470
+ context=contexts[i] if contexts != None else None,
471
+ media_context=(
472
+ media_contexts[i] if media_contexts != None else None
473
+ ),
480
474
  explanation=explanation[i] if explanation != None else None,
481
475
  )
482
476
  )
@@ -546,17 +540,15 @@ class ValidationSetManager:
546
540
  logger.debug("Creating timestamp rapids")
547
541
  rapids: list[Rapid] = []
548
542
  for i in range(len(datapoints)):
549
- rapid_metadata = []
550
- if contexts:
551
- rapid_metadata.append(PromptMetadata(contexts[i]))
552
- if media_contexts:
553
- rapid_metadata.append(MediaAssetMetadata(media_contexts[i]))
554
543
  rapids.append(
555
544
  self.rapid.timestamp_rapid(
556
545
  instruction=instruction,
557
546
  truths=truths[i],
558
547
  datapoint=datapoints[i],
559
- metadata=rapid_metadata,
548
+ context=contexts[i] if contexts != None else None,
549
+ media_context=(
550
+ media_contexts[i] if media_contexts != None else None
551
+ ),
560
552
  explanation=explanation[i] if explanation != None else None,
561
553
  )
562
554
  )
@@ -587,7 +579,7 @@ class ValidationSetManager:
587
579
  self,
588
580
  name: str,
589
581
  rapids: list[Rapid],
590
- dimensions: list[str] | None,
582
+ dimensions: list[str],
591
583
  ) -> RapidataValidationSet:
592
584
  logger.debug("Creating validation set")
593
585
  validation_set_id = (
@@ -611,16 +603,27 @@ class ValidationSetManager:
611
603
  with tracer.start_as_current_span("Adding rapids to validation set"):
612
604
  logger.debug("Adding rapids to validation set")
613
605
  failed_rapids = []
614
- for rapid in tqdm(
615
- rapids,
606
+
607
+ progress_bar = tqdm(
608
+ total=len(rapids),
616
609
  desc="Uploading validation tasks",
617
610
  disable=rapidata_config.logging.silent_mode,
618
- ):
611
+ )
612
+
613
+ for rapid in rapids:
619
614
  try:
620
615
  validation_set.add_rapid(rapid)
621
- except Exception:
616
+ progress_bar.update(1)
617
+ except Exception as e:
618
+ logger.error(
619
+ "Failed to add rapid %s to validation set.\nError: %s",
620
+ rapid.asset,
621
+ str(e),
622
+ )
622
623
  failed_rapids.append(rapid.asset)
623
624
 
625
+ progress_bar.close()
626
+
624
627
  if failed_rapids:
625
628
  logger.error(
626
629
  "Failed to add %s datapoints to validation set: %s",
@@ -5,13 +5,13 @@ from rapidata.api_client import (
5
5
  )
6
6
  from rapidata.api_client.models.compare_workflow_model import CompareWorkflowModel
7
7
  from rapidata.rapidata_client.workflow._base_workflow import Workflow
8
- from rapidata.rapidata_client.datapoints.metadata import PromptMetadata
9
- from rapidata.api_client.models.dataset_dataset_id_datapoints_post_request_metadata_inner import (
10
- DatasetDatasetIdDatapointsPostRequestMetadataInner,
11
- )
12
8
  from rapidata.api_client import ComparePayload
13
9
  from rapidata.rapidata_client.datapoints._datapoint import Datapoint
14
10
  from rapidata.api_client.models.rapid_modality import RapidModality
11
+ from rapidata.rapidata_client.datapoints.metadata import Metadata
12
+ from rapidata.api_client.models.create_datapoint_from_files_model_metadata_inner import (
13
+ CreateDatapointFromFilesModelMetadataInner,
14
+ )
15
15
 
16
16
 
17
17
  class RankingWorkflow(Workflow):
@@ -25,19 +25,11 @@ class RankingWorkflow(Workflow):
25
25
  elo_start: int = 1200,
26
26
  elo_k_factor: int = 40,
27
27
  elo_scaling_factor: int = 400,
28
- context: str | None = None,
28
+ metadatas: list[Metadata] = [],
29
29
  ):
30
30
  super().__init__(type="CompareWorkflowConfig")
31
31
 
32
- self.context = (
33
- [
34
- DatasetDatasetIdDatapointsPostRequestMetadataInner(
35
- PromptMetadata(context).to_model()
36
- )
37
- ]
38
- if context
39
- else None
40
- )
32
+ self.metadatas = metadatas
41
33
 
42
34
  self.criteria = criteria
43
35
  self.total_comparison_budget = total_comparison_budget
@@ -67,7 +59,10 @@ class RankingWorkflow(Workflow):
67
59
  criteria=self.criteria,
68
60
  eloConfig=self.elo_config,
69
61
  pairMakerConfig=self.pair_maker_config,
70
- metadata=self.context,
62
+ metadata=[
63
+ CreateDatapointFromFilesModelMetadataInner(metadata.to_model())
64
+ for metadata in self.metadatas
65
+ ],
71
66
  )
72
67
 
73
68
  def _to_payload(self, datapoint: Datapoint) -> ComparePayload:
@@ -77,7 +72,9 @@ class RankingWorkflow(Workflow):
77
72
  )
78
73
 
79
74
  def __str__(self) -> str:
80
- return f"RankingWorkflow(criteria='{self.criteria}', context={self.context})"
75
+ return (
76
+ f"RankingWorkflow(criteria='{self.criteria}', metadatas={self.metadatas})"
77
+ )
81
78
 
82
79
  def __repr__(self) -> str:
83
- return f"RankingWorkflow(criteria={self.criteria!r}, total_comparison_budget={self.total_comparison_budget!r}, random_comparisons_ratio={self.random_comparisons_ratio!r}, elo_start={self.elo_start!r}, elo_k_factor={self.elo_k_factor!r}, elo_scaling_factor={self.elo_scaling_factor!r}, context={self.context!r})"
80
+ return f"RankingWorkflow(criteria={self.criteria!r}, total_comparison_budget={self.total_comparison_budget!r}, random_comparisons_ratio={self.random_comparisons_ratio!r}, elo_start={self.elo_start!r}, elo_k_factor={self.elo_k_factor!r}, elo_scaling_factor={self.elo_scaling_factor!r}, metadatas={self.metadatas!r})"
@@ -8,9 +8,6 @@ from rapidata.api_client.models.transcription_rapid_blueprint import (
8
8
  from rapidata.rapidata_client.workflow._base_workflow import Workflow
9
9
  from rapidata.api_client import TranscriptionPayload, TranscriptionWord
10
10
  from rapidata.rapidata_client.datapoints._datapoint import Datapoint
11
- from rapidata.rapidata_client.datapoints.metadata._select_words_metadata import (
12
- SelectWordsMetadata,
13
- )
14
11
  from rapidata.api_client.models.rapid_modality import RapidModality
15
12
 
16
13
 
@@ -46,25 +43,15 @@ class SelectWordsWorkflow(Workflow):
46
43
 
47
44
  def _to_payload(self, datapoint: Datapoint) -> TranscriptionPayload:
48
45
  assert (
49
- datapoint.metadata is not None
50
- ), "SelectWordsWorkflow requires a metadata datapoint"
51
-
52
- assert any(
53
- isinstance(metadata, SelectWordsMetadata) for metadata in datapoint.metadata
54
- ), "SelectWordsWorkflow requires a SelectWordsMetadata datapoint"
55
-
56
- select_words_metadata = next(
57
- metadata
58
- for metadata in datapoint.metadata
59
- if isinstance(metadata, SelectWordsMetadata)
60
- )
46
+ datapoint.sentence is not None
47
+ ), "SelectWordsWorkflow requires a sentence datapoint"
61
48
 
62
49
  return TranscriptionPayload(
63
50
  _t="TranscriptionPayload",
64
51
  title=self._instruction,
65
52
  transcription=[
66
53
  TranscriptionWord(word=word, wordIndex=i)
67
- for i, word in enumerate(select_words_metadata.select_words.split())
54
+ for i, word in enumerate(datapoint.sentence.split())
68
55
  ],
69
56
  )
70
57
 
@@ -1,12 +1,13 @@
1
1
  import subprocess
2
2
  from importlib.metadata import version, PackageNotFoundError
3
3
 
4
+ from rapidata.api_client import CustomerRapidApi
4
5
  from rapidata.api_client.api.campaign_api import CampaignApi
6
+ from rapidata.api_client.api.asset_api import AssetApi
5
7
  from rapidata.api_client.api.dataset_api import DatasetApi
6
8
  from rapidata.api_client.api.benchmark_api import BenchmarkApi
7
9
  from rapidata.api_client.api.order_api import OrderApi
8
10
  from rapidata.api_client.api.pipeline_api import PipelineApi
9
- from rapidata.api_client.api.rapid_api import RapidApi
10
11
  from rapidata.api_client.api.leaderboard_api import LeaderboardApi
11
12
  from rapidata.api_client.api.validation_set_api import ValidationSetApi
12
13
  from rapidata.api_client.api.workflow_api import WorkflowApi
@@ -94,6 +95,10 @@ class OpenAPIService:
94
95
  def order_api(self) -> OrderApi:
95
96
  return OrderApi(self.api_client)
96
97
 
98
+ @property
99
+ def asset_api(self) -> AssetApi:
100
+ return AssetApi(self.api_client)
101
+
97
102
  @property
98
103
  def dataset_api(self) -> DatasetApi:
99
104
  return DatasetApi(self.api_client)
@@ -103,8 +108,8 @@ class OpenAPIService:
103
108
  return ValidationSetApi(self.api_client)
104
109
 
105
110
  @property
106
- def rapid_api(self) -> RapidApi:
107
- return RapidApi(self.api_client)
111
+ def rapid_api(self) -> CustomerRapidApi:
112
+ return CustomerRapidApi(self.api_client)
108
113
 
109
114
  @property
110
115
  def campaign_api(self) -> CampaignApi:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rapidata
3
- Version: 2.41.3
3
+ Version: 2.42.1
4
4
  Summary: Rapidata package containing the Rapidata Python Client to interact with the Rapidata Web API in an easy way.
5
5
  License: Apache-2.0
6
6
  License-File: LICENSE