rapidata 2.37.0__py3-none-any.whl → 2.38.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rapidata might be problematic. Click here for more details.

Files changed (65) hide show
  1. rapidata/__init__.py +3 -4
  2. rapidata/rapidata_client/__init__.py +1 -4
  3. rapidata/rapidata_client/api/{rapidata_exception.py → rapidata_api_client.py} +119 -2
  4. rapidata/rapidata_client/benchmark/leaderboard/rapidata_leaderboard.py +88 -46
  5. rapidata/rapidata_client/benchmark/participant/_participant.py +26 -9
  6. rapidata/rapidata_client/benchmark/rapidata_benchmark.py +274 -205
  7. rapidata/rapidata_client/benchmark/rapidata_benchmark_manager.py +98 -76
  8. rapidata/rapidata_client/config/__init__.py +3 -0
  9. rapidata/rapidata_client/config/logger.py +135 -0
  10. rapidata/rapidata_client/config/logging_config.py +58 -0
  11. rapidata/rapidata_client/config/managed_print.py +6 -0
  12. rapidata/rapidata_client/config/order_config.py +14 -0
  13. rapidata/rapidata_client/config/rapidata_config.py +14 -9
  14. rapidata/rapidata_client/config/tracer.py +130 -0
  15. rapidata/rapidata_client/config/upload_config.py +14 -0
  16. rapidata/rapidata_client/datapoints/_datapoint.py +1 -1
  17. rapidata/rapidata_client/datapoints/assets/_media_asset.py +1 -1
  18. rapidata/rapidata_client/datapoints/assets/_sessions.py +2 -2
  19. rapidata/rapidata_client/demographic/demographic_manager.py +16 -14
  20. rapidata/rapidata_client/filter/_base_filter.py +11 -5
  21. rapidata/rapidata_client/filter/age_filter.py +9 -3
  22. rapidata/rapidata_client/filter/and_filter.py +20 -5
  23. rapidata/rapidata_client/filter/campaign_filter.py +7 -1
  24. rapidata/rapidata_client/filter/country_filter.py +8 -2
  25. rapidata/rapidata_client/filter/custom_filter.py +9 -3
  26. rapidata/rapidata_client/filter/gender_filter.py +9 -3
  27. rapidata/rapidata_client/filter/language_filter.py +12 -5
  28. rapidata/rapidata_client/filter/new_user_filter.py +3 -4
  29. rapidata/rapidata_client/filter/not_filter.py +17 -5
  30. rapidata/rapidata_client/filter/or_filter.py +20 -5
  31. rapidata/rapidata_client/filter/response_count_filter.py +6 -0
  32. rapidata/rapidata_client/filter/user_score_filter.py +17 -5
  33. rapidata/rapidata_client/order/_rapidata_dataset.py +45 -17
  34. rapidata/rapidata_client/order/_rapidata_order_builder.py +19 -13
  35. rapidata/rapidata_client/order/rapidata_order.py +60 -48
  36. rapidata/rapidata_client/order/rapidata_order_manager.py +231 -197
  37. rapidata/rapidata_client/order/rapidata_results.py +71 -57
  38. rapidata/rapidata_client/rapidata_client.py +36 -23
  39. rapidata/rapidata_client/selection/_base_selection.py +6 -0
  40. rapidata/rapidata_client/selection/static_selection.py +5 -10
  41. rapidata/rapidata_client/settings/_rapidata_setting.py +8 -0
  42. rapidata/rapidata_client/settings/alert_on_fast_response.py +8 -5
  43. rapidata/rapidata_client/settings/free_text_minimum_characters.py +9 -4
  44. rapidata/rapidata_client/validation/rapidata_validation_set.py +20 -16
  45. rapidata/rapidata_client/validation/rapids/rapids.py +7 -1
  46. rapidata/rapidata_client/validation/validation_set_manager.py +285 -268
  47. rapidata/rapidata_client/workflow/_base_workflow.py +6 -1
  48. rapidata/rapidata_client/workflow/_classify_workflow.py +6 -0
  49. rapidata/rapidata_client/workflow/_compare_workflow.py +6 -0
  50. rapidata/rapidata_client/workflow/_draw_workflow.py +6 -0
  51. rapidata/rapidata_client/workflow/_evaluation_workflow.py +6 -0
  52. rapidata/rapidata_client/workflow/_free_text_workflow.py +6 -0
  53. rapidata/rapidata_client/workflow/_locate_workflow.py +6 -0
  54. rapidata/rapidata_client/workflow/_ranking_workflow.py +12 -0
  55. rapidata/rapidata_client/workflow/_select_words_workflow.py +6 -0
  56. rapidata/rapidata_client/workflow/_timestamp_workflow.py +6 -0
  57. rapidata/service/credential_manager.py +1 -1
  58. rapidata/service/openapi_service.py +2 -2
  59. {rapidata-2.37.0.dist-info → rapidata-2.38.0.dist-info}/METADATA +4 -1
  60. {rapidata-2.37.0.dist-info → rapidata-2.38.0.dist-info}/RECORD +62 -59
  61. rapidata/rapidata_client/logging/__init__.py +0 -2
  62. rapidata/rapidata_client/logging/logger.py +0 -122
  63. rapidata/rapidata_client/logging/output_manager.py +0 -20
  64. {rapidata-2.37.0.dist-info → rapidata-2.38.0.dist-info}/LICENSE +0 -0
  65. {rapidata-2.37.0.dist-info → rapidata-2.38.0.dist-info}/WHEEL +0 -0
rapidata/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- __version__ = "2.37.0"
1
+ __version__ = "2.38.0"
2
2
 
3
3
  from .rapidata_client import (
4
4
  RapidataClient,
@@ -38,9 +38,8 @@ from .rapidata_client import (
38
38
  Box,
39
39
  Datapoint,
40
40
  PromptMetadata,
41
- logger,
42
- configure_logger,
43
- RapidataOutputManager,
44
41
  FailedUploadException,
45
42
  rapidata_config,
43
+ logger,
44
+ managed_print,
46
45
  )
@@ -43,9 +43,6 @@ from .filter import (
43
43
  OrFilter,
44
44
  ResponseCountFilter,
45
45
  )
46
-
47
- from .logging import configure_logger, logger, RapidataOutputManager
48
-
49
46
  from .validation import Box
50
47
  from .exceptions import FailedUploadException
51
- from .config import rapidata_config
48
+ from .config import rapidata_config, logger, managed_print
@@ -1,10 +1,19 @@
1
1
  from typing import Optional, Any
2
- from rapidata.api_client.api_client import ApiClient, rest, ApiResponse, ApiResponseT
2
+ from rapidata.api_client.api_client import (
3
+ ApiClient,
4
+ rest,
5
+ ApiResponse,
6
+ ApiResponseT,
7
+ )
3
8
  from rapidata.api_client.exceptions import ApiException
4
9
  import json
5
10
  import threading
6
11
  from contextlib import contextmanager
7
- from rapidata.rapidata_client.logging import logger
12
+ from rapidata.rapidata_client.config import logger, tracer
13
+ from opentelemetry import trace
14
+ from opentelemetry.trace import format_trace_id, format_span_id, Link, SpanContext
15
+ from opentelemetry.sdk.trace.id_generator import RandomIdGenerator
16
+
8
17
 
9
18
  # Thread-local storage for controlling error logging
10
19
  _thread_local = threading.local()
@@ -91,6 +100,114 @@ class RapidataError(Exception):
91
100
  class RapidataApiClient(ApiClient):
92
101
  """Custom API client that wraps errors in RapidataError."""
93
102
 
103
+ def __init__(self, *args, **kwargs):
104
+ super().__init__(*args, **kwargs)
105
+ self.id_generator = RandomIdGenerator()
106
+
107
+ def call_api(
108
+ self,
109
+ method,
110
+ url,
111
+ header_params=None,
112
+ body=None,
113
+ post_params=None,
114
+ _request_timeout=None,
115
+ ) -> rest.RESTResponse:
116
+ # Get the current span from OpenTelemetry
117
+ current_span = trace.get_current_span()
118
+
119
+ # Initialize header_params if it's None
120
+ if header_params is None:
121
+ header_params = {}
122
+
123
+ # Add tracing headers if we have a valid span
124
+ if not current_span.is_recording():
125
+ return super().call_api(
126
+ method,
127
+ url,
128
+ header_params,
129
+ body,
130
+ post_params,
131
+ _request_timeout,
132
+ )
133
+
134
+ current_span_context = current_span.get_span_context()
135
+
136
+ # Generate a new trace ID for backend communication
137
+ # This separates the backend trace from the SDK trace
138
+ backend_trace_id = self.id_generator.generate_trace_id()
139
+ backend_span_id = self.id_generator.generate_span_id()
140
+
141
+ # Create a new span context for the backend trace
142
+ backend_span_context = SpanContext(
143
+ trace_id=backend_trace_id,
144
+ span_id=backend_span_id,
145
+ is_remote=True,
146
+ trace_flags=current_span_context.trace_flags,
147
+ )
148
+
149
+ # Create a link from current SDK span to the backend trace
150
+ link_to_backend = Link(backend_span_context)
151
+
152
+ # Create a link from backend trace back to the original SDK span
153
+ link_back_to_sdk = Link(current_span_context)
154
+
155
+ # Create a span in the current SDK trace that links to the backend
156
+ with tracer.start_span(
157
+ f"sdk_request_{method}_{url.replace('/', '_')}",
158
+ links=[link_to_backend],
159
+ ) as sdk_request_span:
160
+ # Set attributes on the SDK span
161
+ sdk_request_span.set_attribute("http.method", method)
162
+ sdk_request_span.set_attribute("http.target", url)
163
+ sdk_request_span.set_attribute(
164
+ "rapidata.backend_trace_id", format_trace_id(backend_trace_id)
165
+ )
166
+ sdk_request_span.set_attribute(
167
+ "rapidata.original_trace_id",
168
+ format_trace_id(current_span_context.trace_id),
169
+ )
170
+
171
+ # Now create the initial span for the backend trace that will be sent
172
+ # This span will be the starting point for the backend trace
173
+ with tracer.start_span(
174
+ f"backend_trace_start_{method}_{url.replace('/', '_')}",
175
+ context=trace.set_span_in_context(
176
+ trace.NonRecordingSpan(backend_span_context)
177
+ ),
178
+ links=[link_back_to_sdk],
179
+ ) as backend_initial_span:
180
+ # Set attributes on the backend initial span
181
+ backend_initial_span.set_attribute("http.method", method)
182
+ backend_initial_span.set_attribute("http.target", url)
183
+ backend_initial_span.set_attribute(
184
+ "rapidata.trace_type", "backend_start"
185
+ )
186
+ backend_initial_span.set_attribute(
187
+ "rapidata.sdk_trace_id",
188
+ format_trace_id(current_span_context.trace_id),
189
+ )
190
+
191
+ # Format the traceparent header with the backend trace ID
192
+ # The backend will receive this and continue the trace
193
+ header_params["traceparent"] = (
194
+ "00-"
195
+ + format_trace_id(backend_trace_id)
196
+ + "-"
197
+ + format_span_id(backend_span_id)
198
+ + "-"
199
+ + f"{backend_span_context.trace_flags:02x}"
200
+ )
201
+
202
+ return super().call_api(
203
+ method,
204
+ url,
205
+ header_params,
206
+ body,
207
+ post_params,
208
+ _request_timeout,
209
+ )
210
+
94
211
  def response_deserialize(
95
212
  self,
96
213
  response_data: rest.RESTResponse,
@@ -1,12 +1,13 @@
1
+ import urllib.parse
2
+ import webbrowser
3
+ from colorama import Fore
1
4
  import pandas as pd
2
5
  from typing import Literal, Optional
3
6
 
4
- from rapidata.rapidata_client.logging import logger
7
+ from rapidata.rapidata_client.config import logger, managed_print, tracer
5
8
  from rapidata.rapidata_client.benchmark._detail_mapper import DetailMapper
6
9
  from rapidata.service.openapi_service import OpenAPIService
7
- from rapidata.api_client.models.update_leaderboard_response_config_model import (
8
- UpdateLeaderboardResponseConfigModel,
9
- )
10
+ from rapidata.api_client.models.update_leaderboard_model import UpdateLeaderboardModel
10
11
 
11
12
 
12
13
  class RapidataLeaderboard:
@@ -32,6 +33,7 @@ class RapidataLeaderboard:
32
33
  inverse_ranking: bool,
33
34
  response_budget: int,
34
35
  min_responses_per_matchup: int,
36
+ benchmark_id: str,
35
37
  id: str,
36
38
  openapi_service: OpenAPIService,
37
39
  ):
@@ -43,7 +45,9 @@ class RapidataLeaderboard:
43
45
  self.__inverse_ranking = inverse_ranking
44
46
  self.__response_budget = response_budget
45
47
  self.__min_responses_per_matchup = min_responses_per_matchup
48
+ self.__benchmark_id = benchmark_id
46
49
  self.id = id
50
+ self.__leaderboard_page = f"https://app.{self.__openapi_service.environment}/mri/benchmarks/{self.__benchmark_id}/leaderboard/{self.id}"
47
51
 
48
52
  @property
49
53
  def level_of_detail(self) -> Literal["low", "medium", "high", "very high"]:
@@ -59,15 +63,10 @@ class RapidataLeaderboard:
59
63
  """
60
64
  Sets the level of detail of the leaderboard.
61
65
  """
62
- logger.debug(f"Setting level of detail to {level_of_detail}")
63
- self.__openapi_service.leaderboard_api.leaderboard_leaderboard_id_response_config_put(
64
- leaderboard_id=self.id,
65
- update_leaderboard_response_config_model=UpdateLeaderboardResponseConfigModel(
66
- responseBudget=DetailMapper.get_budget(level_of_detail),
67
- minResponses=self.__min_responses_per_matchup,
68
- ),
69
- )
70
- self.__response_budget = DetailMapper.get_budget(level_of_detail)
66
+ with tracer.start_as_current_span("RapidataLeaderboard.level_of_detail.setter"):
67
+ logger.debug(f"Setting level of detail to {level_of_detail}")
68
+ self.__response_budget = DetailMapper.get_budget(level_of_detail)
69
+ self._update_config()
71
70
 
72
71
  @property
73
72
  def min_responses_per_matchup(self) -> int:
@@ -81,23 +80,20 @@ class RapidataLeaderboard:
81
80
  """
82
81
  Sets the minimum number of responses required to be considered for the leaderboard.
83
82
  """
84
- if not isinstance(min_responses, int):
85
- raise ValueError("Min responses per matchup must be an integer")
83
+ with tracer.start_as_current_span(
84
+ "RapidataLeaderboard.min_responses_per_matchup.setter"
85
+ ):
86
+ if not isinstance(min_responses, int):
87
+ raise ValueError("Min responses per matchup must be an integer")
86
88
 
87
- if min_responses < 3:
88
- raise ValueError("Min responses per matchup must be at least 3")
89
+ if min_responses < 3:
90
+ raise ValueError("Min responses per matchup must be at least 3")
89
91
 
90
- logger.debug(
91
- f"Setting min responses per matchup to {min_responses} for leaderboard {self.name}"
92
- )
93
- self.__openapi_service.leaderboard_api.leaderboard_leaderboard_id_response_config_put(
94
- leaderboard_id=self.id,
95
- update_leaderboard_response_config_model=UpdateLeaderboardResponseConfigModel(
96
- responseBudget=self.__response_budget,
97
- minResponses=min_responses,
98
- ),
99
- )
100
- self.__min_responses_per_matchup = min_responses
92
+ logger.debug(
93
+ f"Setting min responses per matchup to {min_responses} for leaderboard {self.name}"
94
+ )
95
+ self.__min_responses_per_matchup = min_responses
96
+ self._update_config()
101
97
 
102
98
  @property
103
99
  def show_prompt_asset(self) -> bool:
@@ -134,6 +130,20 @@ class RapidataLeaderboard:
134
130
  """
135
131
  return self.__name
136
132
 
133
+ @name.setter
134
+ def name(self, name: str):
135
+ """
136
+ Sets the name of the leaderboard.
137
+ """
138
+ with tracer.start_as_current_span("RapidataLeaderboard.name.setter"):
139
+ if not isinstance(name, str):
140
+ raise ValueError("Name must be a string")
141
+ if len(name) < 1:
142
+ raise ValueError("Name must be at least 1 character long")
143
+
144
+ self.__name = name
145
+ self._update_config()
146
+
137
147
  def get_standings(self, tags: Optional[list[str]] = None) -> pd.DataFrame:
138
148
  """
139
149
  Returns the standings of the leaderboard.
@@ -146,27 +156,59 @@ class RapidataLeaderboard:
146
156
  Returns:
147
157
  A pandas DataFrame containing the standings of the leaderboard.
148
158
  """
159
+ with tracer.start_as_current_span("RapidataLeaderboard.get_standings"):
160
+ participants = self.__openapi_service.leaderboard_api.leaderboard_leaderboard_id_standings_get(
161
+ leaderboard_id=self.id, tags=tags
162
+ )
149
163
 
150
- participants = self.__openapi_service.leaderboard_api.leaderboard_leaderboard_id_standings_get(
151
- leaderboard_id=self.id, tags=tags
152
- )
153
-
154
- standings = []
155
- for participant in participants.items:
156
- standings.append(
157
- {
158
- "name": participant.name,
159
- "wins": participant.wins,
160
- "total_matches": participant.total_matches,
161
- "score": (
162
- round(participant.score, 2)
163
- if participant.score is not None
164
- else None
165
- ),
166
- }
164
+ standings = []
165
+ for participant in participants.items:
166
+ standings.append(
167
+ {
168
+ "name": participant.name,
169
+ "wins": participant.wins,
170
+ "total_matches": participant.total_matches,
171
+ "score": (
172
+ round(participant.score, 2)
173
+ if participant.score is not None
174
+ else None
175
+ ),
176
+ }
177
+ )
178
+
179
+ return pd.DataFrame(standings)
180
+
181
+ def view(self) -> None:
182
+ """
183
+ Views the leaderboard.
184
+ """
185
+ logger.info("Opening leaderboard page in browser...")
186
+ could_open_browser = webbrowser.open(self.__leaderboard_page)
187
+ if not could_open_browser:
188
+ encoded_url = urllib.parse.quote(
189
+ self.__leaderboard_page, safe="%/:=&?~#+!$,;'@()*[]"
190
+ )
191
+ managed_print(
192
+ Fore.RED
193
+ + f"Please open this URL in your browser: '{encoded_url}'"
194
+ + Fore.RESET
167
195
  )
168
196
 
169
- return pd.DataFrame(standings)
197
+ def _custom_config(self, response_budget: int, min_responses_per_matchup: int):
198
+ self.__response_budget = response_budget
199
+ self.__min_responses_per_matchup = min_responses_per_matchup
200
+ self._update_config()
201
+
202
+ def _update_config(self):
203
+ with tracer.start_as_current_span("RapidataLeaderboard._update_config"):
204
+ self.__openapi_service.leaderboard_api.leaderboard_leaderboard_id_patch(
205
+ leaderboard_id=self.id,
206
+ update_leaderboard_model=UpdateLeaderboardModel(
207
+ name=self.__name,
208
+ responseBudget=self.__response_budget,
209
+ minResponses=self.__min_responses_per_matchup,
210
+ ),
211
+ )
170
212
 
171
213
  def __str__(self) -> str:
172
214
  return f"RapidataLeaderboard(name={self.name}, instruction={self.instruction}, show_prompt={self.show_prompt}, leaderboard_id={self.id})"
@@ -3,15 +3,17 @@ import time
3
3
  from tqdm import tqdm
4
4
 
5
5
  from rapidata.rapidata_client.datapoints.assets import MediaAsset
6
- from rapidata.rapidata_client.logging import logger
7
- from rapidata.rapidata_client.logging.output_manager import RapidataOutputManager
6
+ from rapidata.rapidata_client.config import logger
8
7
  from rapidata.api_client.models.create_sample_model import CreateSampleModel
9
8
  from rapidata.service.openapi_service import OpenAPIService
10
9
  from rapidata.rapidata_client.config.rapidata_config import rapidata_config
11
- from rapidata.rapidata_client.api.rapidata_exception import (
10
+ from rapidata.rapidata_client.api.rapidata_api_client import (
12
11
  suppress_rapidata_error_logging,
13
12
  )
14
13
 
14
+ # Add OpenTelemetry context imports for thread propagation
15
+ from opentelemetry import context as otel_context
16
+
15
17
 
16
18
  class BenchmarkParticipant:
17
19
  def __init__(self, name: str, id: str, openapi_service: OpenAPIService):
@@ -42,7 +44,7 @@ class BenchmarkParticipant:
42
44
  urls = [asset.path]
43
45
 
44
46
  last_exception = None
45
- for attempt in range(rapidata_config.uploadMaxRetries):
47
+ for attempt in range(rapidata_config.upload.maxRetries):
46
48
  try:
47
49
  with suppress_rapidata_error_logging():
48
50
  self.__openapi_service.participant_api.participant_participant_id_sample_post(
@@ -56,7 +58,7 @@ class BenchmarkParticipant:
56
58
 
57
59
  except Exception as e:
58
60
  last_exception = e
59
- if attempt < rapidata_config.uploadMaxRetries - 1:
61
+ if attempt < rapidata_config.upload.maxRetries - 1:
60
62
  # Exponential backoff: wait 1s, then 2s, then 4s
61
63
  retry_delay = 2**attempt
62
64
  time.sleep(retry_delay)
@@ -64,7 +66,7 @@ class BenchmarkParticipant:
64
66
  logger.debug(
65
67
  "Retrying %s of %s...",
66
68
  attempt + 1,
67
- rapidata_config.uploadMaxRetries,
69
+ rapidata_config.upload.maxRetries,
68
70
  )
69
71
 
70
72
  logger.error(f"Upload failed for {identifier}. Error: {str(last_exception)}")
@@ -85,16 +87,31 @@ class BenchmarkParticipant:
85
87
  Returns:
86
88
  tuple[list[str], list[str]]: Lists of successful and failed identifiers
87
89
  """
90
+
91
+ def upload_with_context(
92
+ context: otel_context.Context, asset: MediaAsset, identifier: str
93
+ ) -> tuple[MediaAsset | None, MediaAsset | None]:
94
+ """Wrapper function that runs _process_single_sample_upload with the provided context."""
95
+ token = otel_context.attach(context)
96
+ try:
97
+ return self._process_single_sample_upload(asset, identifier)
98
+ finally:
99
+ otel_context.detach(token)
100
+
88
101
  successful_uploads: list[MediaAsset] = []
89
102
  failed_uploads: list[MediaAsset] = []
90
103
  total_uploads = len(assets)
91
104
 
105
+ # Capture the current OpenTelemetry context before creating threads
106
+ current_context = otel_context.get_current()
107
+
92
108
  with ThreadPoolExecutor(
93
- max_workers=rapidata_config.maxUploadWorkers
109
+ max_workers=rapidata_config.upload.maxWorkers
94
110
  ) as executor:
95
111
  futures = [
96
112
  executor.submit(
97
- self._process_single_sample_upload,
113
+ upload_with_context,
114
+ current_context,
98
115
  asset,
99
116
  identifier,
100
117
  )
@@ -104,7 +121,7 @@ class BenchmarkParticipant:
104
121
  with tqdm(
105
122
  total=total_uploads,
106
123
  desc="Uploading media",
107
- disable=RapidataOutputManager.silent_mode,
124
+ disable=rapidata_config.logging.silent_mode,
108
125
  ) as pbar:
109
126
  for future in as_completed(futures):
110
127
  try: