rapidata 2.37.0__py3-none-any.whl → 2.38.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rapidata might be problematic. Click here for more details.

Files changed (65) hide show
  1. rapidata/__init__.py +3 -4
  2. rapidata/rapidata_client/__init__.py +1 -4
  3. rapidata/rapidata_client/api/{rapidata_exception.py → rapidata_api_client.py} +119 -2
  4. rapidata/rapidata_client/benchmark/leaderboard/rapidata_leaderboard.py +88 -46
  5. rapidata/rapidata_client/benchmark/participant/_participant.py +26 -9
  6. rapidata/rapidata_client/benchmark/rapidata_benchmark.py +274 -205
  7. rapidata/rapidata_client/benchmark/rapidata_benchmark_manager.py +98 -76
  8. rapidata/rapidata_client/config/__init__.py +3 -0
  9. rapidata/rapidata_client/config/logger.py +135 -0
  10. rapidata/rapidata_client/config/logging_config.py +58 -0
  11. rapidata/rapidata_client/config/managed_print.py +6 -0
  12. rapidata/rapidata_client/config/order_config.py +14 -0
  13. rapidata/rapidata_client/config/rapidata_config.py +14 -9
  14. rapidata/rapidata_client/config/tracer.py +130 -0
  15. rapidata/rapidata_client/config/upload_config.py +14 -0
  16. rapidata/rapidata_client/datapoints/_datapoint.py +1 -1
  17. rapidata/rapidata_client/datapoints/assets/_media_asset.py +1 -1
  18. rapidata/rapidata_client/datapoints/assets/_sessions.py +2 -2
  19. rapidata/rapidata_client/demographic/demographic_manager.py +16 -14
  20. rapidata/rapidata_client/filter/_base_filter.py +11 -5
  21. rapidata/rapidata_client/filter/age_filter.py +9 -3
  22. rapidata/rapidata_client/filter/and_filter.py +20 -5
  23. rapidata/rapidata_client/filter/campaign_filter.py +7 -1
  24. rapidata/rapidata_client/filter/country_filter.py +8 -2
  25. rapidata/rapidata_client/filter/custom_filter.py +9 -3
  26. rapidata/rapidata_client/filter/gender_filter.py +9 -3
  27. rapidata/rapidata_client/filter/language_filter.py +12 -5
  28. rapidata/rapidata_client/filter/new_user_filter.py +3 -4
  29. rapidata/rapidata_client/filter/not_filter.py +17 -5
  30. rapidata/rapidata_client/filter/or_filter.py +20 -5
  31. rapidata/rapidata_client/filter/response_count_filter.py +6 -0
  32. rapidata/rapidata_client/filter/user_score_filter.py +17 -5
  33. rapidata/rapidata_client/order/_rapidata_dataset.py +45 -17
  34. rapidata/rapidata_client/order/_rapidata_order_builder.py +19 -13
  35. rapidata/rapidata_client/order/rapidata_order.py +60 -48
  36. rapidata/rapidata_client/order/rapidata_order_manager.py +231 -197
  37. rapidata/rapidata_client/order/rapidata_results.py +71 -57
  38. rapidata/rapidata_client/rapidata_client.py +36 -23
  39. rapidata/rapidata_client/selection/_base_selection.py +6 -0
  40. rapidata/rapidata_client/selection/static_selection.py +5 -10
  41. rapidata/rapidata_client/settings/_rapidata_setting.py +8 -0
  42. rapidata/rapidata_client/settings/alert_on_fast_response.py +8 -5
  43. rapidata/rapidata_client/settings/free_text_minimum_characters.py +9 -4
  44. rapidata/rapidata_client/validation/rapidata_validation_set.py +20 -16
  45. rapidata/rapidata_client/validation/rapids/rapids.py +7 -1
  46. rapidata/rapidata_client/validation/validation_set_manager.py +285 -268
  47. rapidata/rapidata_client/workflow/_base_workflow.py +6 -1
  48. rapidata/rapidata_client/workflow/_classify_workflow.py +6 -0
  49. rapidata/rapidata_client/workflow/_compare_workflow.py +6 -0
  50. rapidata/rapidata_client/workflow/_draw_workflow.py +6 -0
  51. rapidata/rapidata_client/workflow/_evaluation_workflow.py +6 -0
  52. rapidata/rapidata_client/workflow/_free_text_workflow.py +6 -0
  53. rapidata/rapidata_client/workflow/_locate_workflow.py +6 -0
  54. rapidata/rapidata_client/workflow/_ranking_workflow.py +12 -0
  55. rapidata/rapidata_client/workflow/_select_words_workflow.py +6 -0
  56. rapidata/rapidata_client/workflow/_timestamp_workflow.py +6 -0
  57. rapidata/service/credential_manager.py +1 -1
  58. rapidata/service/openapi_service.py +2 -2
  59. {rapidata-2.37.0.dist-info → rapidata-2.38.0.dist-info}/METADATA +4 -1
  60. {rapidata-2.37.0.dist-info → rapidata-2.38.0.dist-info}/RECORD +62 -59
  61. rapidata/rapidata_client/logging/__init__.py +0 -2
  62. rapidata/rapidata_client/logging/logger.py +0 -122
  63. rapidata/rapidata_client/logging/output_manager.py +0 -20
  64. {rapidata-2.37.0.dist-info → rapidata-2.38.0.dist-info}/LICENSE +0 -0
  65. {rapidata-2.37.0.dist-info → rapidata-2.38.0.dist-info}/WHEEL +0 -0
@@ -9,6 +9,7 @@ from rapidata.api_client.models.filter import Filter
9
9
  from rapidata.api_client.models.sort_criterion import SortCriterion
10
10
  from rapidata.api_client.models.sort_direction import SortDirection
11
11
  from rapidata.api_client.models.filter_operator import FilterOperator
12
+ from rapidata.rapidata_client.config import logger, tracer
12
13
 
13
14
 
14
15
  class RapidataBenchmarkManager:
@@ -58,78 +59,96 @@ class RapidataBenchmarkManager:
58
59
  benchmark = create_new_benchmark(name=name, identifiers=identifiers, prompts=prompts, prompt_assets=prompt_assets, tags=tags)
59
60
  ```
60
61
  """
61
- if not isinstance(name, str):
62
- raise ValueError("Name must be a string.")
63
-
64
- if prompts and (
65
- not isinstance(prompts, list)
66
- or not all(isinstance(prompt, str) or prompt is None for prompt in prompts)
62
+ with tracer.start_as_current_span(
63
+ "RapidataBenchmarkManager.create_new_benchmark"
67
64
  ):
68
- raise ValueError("Prompts must be a list of strings or None.")
69
-
70
- if prompt_assets and (
71
- not isinstance(prompt_assets, list)
72
- or not all(
73
- isinstance(asset, str) or asset is None for asset in prompt_assets
65
+ if not isinstance(name, str):
66
+ raise ValueError("Name must be a string.")
67
+
68
+ if prompts and (
69
+ not isinstance(prompts, list)
70
+ or not all(
71
+ isinstance(prompt, str) or prompt is None for prompt in prompts
72
+ )
73
+ ):
74
+ raise ValueError("Prompts must be a list of strings or None.")
75
+
76
+ if prompt_assets and (
77
+ not isinstance(prompt_assets, list)
78
+ or not all(
79
+ isinstance(asset, str) or asset is None for asset in prompt_assets
80
+ )
81
+ ):
82
+ raise ValueError("Media assets must be a list of strings or None.")
83
+
84
+ if not isinstance(identifiers, list) or not all(
85
+ isinstance(identifier, str) for identifier in identifiers
86
+ ):
87
+ raise ValueError("Identifiers must be a list of strings.")
88
+
89
+ if prompts and len(identifiers) != len(prompts):
90
+ raise ValueError("Identifiers and prompts must have the same length.")
91
+
92
+ if prompt_assets and len(identifiers) != len(prompt_assets):
93
+ raise ValueError(
94
+ "Identifiers and media assets must have the same length."
95
+ )
96
+
97
+ if not prompts and not prompt_assets:
98
+ raise ValueError(
99
+ "At least one of prompts or media assets must be provided."
100
+ )
101
+
102
+ if len(set(identifiers)) != len(identifiers):
103
+ raise ValueError("Identifiers must be unique.")
104
+
105
+ if tags and len(identifiers) != len(tags):
106
+ raise ValueError("Identifiers and tags must have the same length.")
107
+
108
+ logger.info("Creating new benchmark %s", name)
109
+
110
+ benchmark_result = self.__openapi_service.benchmark_api.benchmark_post(
111
+ create_benchmark_model=CreateBenchmarkModel(
112
+ name=name,
113
+ )
74
114
  )
75
- ):
76
- raise ValueError("Media assets must be a list of strings or None.")
77
115
 
78
- if not isinstance(identifiers, list) or not all(
79
- isinstance(identifier, str) for identifier in identifiers
80
- ):
81
- raise ValueError("Identifiers must be a list of strings.")
116
+ logger.info("Benchmark created with id %s", benchmark_result.id)
82
117
 
83
- if prompts and len(identifiers) != len(prompts):
84
- raise ValueError("Identifiers and prompts must have the same length.")
85
-
86
- if prompt_assets and len(identifiers) != len(prompt_assets):
87
- raise ValueError("Identifiers and media assets must have the same length.")
88
-
89
- if not prompts and not prompt_assets:
90
- raise ValueError(
91
- "At least one of prompts or media assets must be provided."
118
+ benchmark = RapidataBenchmark(
119
+ name, benchmark_result.id, self.__openapi_service
92
120
  )
93
121
 
94
- if len(set(identifiers)) != len(identifiers):
95
- raise ValueError("Identifiers must be unique.")
96
-
97
- if tags and len(identifiers) != len(tags):
98
- raise ValueError("Identifiers and tags must have the same length.")
99
-
100
- benchmark_result = self.__openapi_service.benchmark_api.benchmark_post(
101
- create_benchmark_model=CreateBenchmarkModel(
102
- name=name,
122
+ prompts_list = prompts if prompts is not None else [None] * len(identifiers)
123
+ media_assets_list = (
124
+ prompt_assets
125
+ if prompt_assets is not None
126
+ else [None] * len(identifiers)
103
127
  )
104
- )
105
-
106
- benchmark = RapidataBenchmark(name, benchmark_result.id, self.__openapi_service)
107
-
108
- prompts_list = prompts if prompts is not None else [None] * len(identifiers)
109
- media_assets_list = (
110
- prompt_assets if prompt_assets is not None else [None] * len(identifiers)
111
- )
112
- tags_list = tags if tags is not None else [None] * len(identifiers)
128
+ tags_list = tags if tags is not None else [None] * len(identifiers)
113
129
 
114
- for identifier, prompt, asset, tag in zip(
115
- identifiers, prompts_list, media_assets_list, tags_list
116
- ):
117
- benchmark.add_prompt(identifier, prompt, asset, tag)
130
+ for identifier, prompt, asset, tag in zip(
131
+ identifiers, prompts_list, media_assets_list, tags_list
132
+ ):
133
+ benchmark.add_prompt(identifier, prompt, asset, tag)
118
134
 
119
- return benchmark
135
+ return benchmark
120
136
 
121
137
  def get_benchmark_by_id(self, id: str) -> RapidataBenchmark:
122
138
  """
123
139
  Returns a benchmark by its ID.
124
140
  """
125
- benchmark_result = (
126
- self.__openapi_service.benchmark_api.benchmark_benchmark_id_get(
127
- benchmark_id=id
141
+ with tracer.start_as_current_span(
142
+ "RapidataBenchmarkManager.get_benchmark_by_id"
143
+ ):
144
+ benchmark_result = (
145
+ self.__openapi_service.benchmark_api.benchmark_benchmark_id_get(
146
+ benchmark_id=id
147
+ )
148
+ )
149
+ return RapidataBenchmark(
150
+ benchmark_result.name, benchmark_result.id, self.__openapi_service
128
151
  )
129
- )
130
- return RapidataBenchmark(
131
- benchmark_result.name, benchmark_result.id, self.__openapi_service
132
- )
133
152
 
134
153
  def find_benchmarks(
135
154
  self, name: str = "", amount: int = 10
@@ -137,24 +156,27 @@ class RapidataBenchmarkManager:
137
156
  """
138
157
  Returns a list of benchmarks by their name.
139
158
  """
140
- benchmark_result = self.__openapi_service.benchmark_api.benchmarks_get(
141
- QueryModel(
142
- page=PageInfo(index=1, size=amount),
143
- filter=RootFilter(
144
- filters=[
145
- Filter(
146
- field="Name", operator=FilterOperator.CONTAINS, value=name
159
+ with tracer.start_as_current_span("RapidataBenchmarkManager.find_benchmarks"):
160
+ benchmark_result = self.__openapi_service.benchmark_api.benchmarks_get(
161
+ QueryModel(
162
+ page=PageInfo(index=1, size=amount),
163
+ filter=RootFilter(
164
+ filters=[
165
+ Filter(
166
+ field="Name",
167
+ operator=FilterOperator.CONTAINS,
168
+ value=name,
169
+ )
170
+ ]
171
+ ),
172
+ sortCriteria=[
173
+ SortCriterion(
174
+ direction=SortDirection.DESC, propertyName="CreatedAt"
147
175
  )
148
- ]
149
- ),
150
- sortCriteria=[
151
- SortCriterion(
152
- direction=SortDirection.DESC, propertyName="CreatedAt"
153
- )
154
- ],
176
+ ],
177
+ )
155
178
  )
156
- )
157
- return [
158
- RapidataBenchmark(benchmark.name, benchmark.id, self.__openapi_service)
159
- for benchmark in benchmark_result.items
160
- ]
179
+ return [
180
+ RapidataBenchmark(benchmark.name, benchmark.id, self.__openapi_service)
181
+ for benchmark in benchmark_result.items
182
+ ]
@@ -1 +1,4 @@
1
+ from .logger import logger # has to be instantiated before rapidata_config
2
+ from .tracer import tracer # has to be instantiated before rapidata_config
1
3
  from .rapidata_config import rapidata_config
4
+ from .managed_print import managed_print
@@ -0,0 +1,135 @@
1
+ import logging
2
+ from typing import Protocol, runtime_checkable
3
+ from opentelemetry._logs import set_logger_provider
4
+ from opentelemetry.exporter.otlp.proto.http._log_exporter import OTLPLogExporter
5
+ from opentelemetry.sdk._logs import LoggerProvider, LoggingHandler
6
+ from opentelemetry.sdk._logs.export import BatchLogRecordProcessor
7
+ from opentelemetry.sdk.resources import Resource
8
+ from rapidata import __version__
9
+ from .logging_config import LoggingConfig, register_config_handler
10
+
11
+
12
+ @runtime_checkable
13
+ class LoggerProtocol(Protocol):
14
+ """Protocol that defines the logger interface for type checking."""
15
+
16
+ def debug(self, msg: object, *args, **kwargs) -> None: ...
17
+ def info(self, msg: object, *args, **kwargs) -> None: ...
18
+ def warning(self, msg: object, *args, **kwargs) -> None: ...
19
+ def warn(self, msg: object, *args, **kwargs) -> None: ...
20
+ def error(self, msg: object, *args, **kwargs) -> None: ...
21
+ def exception(self, msg: object, *args, exc_info=True, **kwargs) -> None: ...
22
+ def critical(self, msg: object, *args, **kwargs) -> None: ...
23
+ def fatal(self, msg: object, *args, **kwargs) -> None: ...
24
+ def log(self, level: int, msg: object, *args, **kwargs) -> None: ...
25
+ def isEnabledFor(self, level: int) -> bool: ...
26
+ def getEffectiveLevel(self) -> int: ...
27
+ def setLevel(self, level: int | str) -> None: ...
28
+ def addHandler(self, handler: logging.Handler) -> None: ...
29
+ def removeHandler(self, handler: logging.Handler) -> None: ...
30
+ @property
31
+ def handlers(self) -> list[logging.Handler]: ...
32
+ @property
33
+ def level(self) -> int: ...
34
+ @property
35
+ def name(self) -> str: ...
36
+
37
+
38
+ class RapidataLogger:
39
+ """Logger implementation that updates when the configuration changes."""
40
+
41
+ def __init__(self, name: str = "rapidata"):
42
+ self._logger = logging.getLogger(name)
43
+ self._otlp_initialized = False
44
+ self._otlp_handler = None
45
+
46
+ # Register this logger to receive configuration updates
47
+ register_config_handler(self._handle_config_update)
48
+
49
+ def _handle_config_update(self, config: LoggingConfig) -> None:
50
+ """Handle configuration updates."""
51
+ self._update_logger(config)
52
+
53
+ def _update_logger(self, config: LoggingConfig) -> None:
54
+ """Update the logger based on the new configuration."""
55
+ # Initialize OTLP logging only once and only if not disabled
56
+ if not self._otlp_initialized and config.enable_otlp:
57
+ try:
58
+ logger_provider = LoggerProvider(
59
+ resource=Resource.create(
60
+ {
61
+ "service.name": "Rapidata.Python.SDK",
62
+ "service.version": __version__,
63
+ }
64
+ ),
65
+ )
66
+ set_logger_provider(logger_provider)
67
+
68
+ exporter = OTLPLogExporter(
69
+ endpoint="https://otlp-sdk.rapidata.ai/v1/logs",
70
+ timeout=30,
71
+ )
72
+
73
+ processor = BatchLogRecordProcessor(
74
+ exporter,
75
+ max_queue_size=2048,
76
+ export_timeout_millis=30000,
77
+ max_export_batch_size=512,
78
+ )
79
+
80
+ logger_provider.add_log_record_processor(processor)
81
+
82
+ # OTLP handler - captures DEBUG and above
83
+ self._otlp_handler = LoggingHandler(logger_provider=logger_provider)
84
+ self._otlp_handler.setLevel(logging.DEBUG) # OTLP gets everything
85
+
86
+ self._otlp_initialized = True
87
+
88
+ except Exception as e:
89
+ self._logger.warning(f"Failed to initialize OTLP logging: {e}")
90
+ import traceback
91
+
92
+ traceback.print_exc()
93
+
94
+ # Console handler with configurable level
95
+ console_handler = logging.StreamHandler()
96
+ console_level = getattr(logging, config.level.upper())
97
+ console_handler.setLevel(console_level)
98
+ console_formatter = logging.Formatter(config.format)
99
+ console_handler.setFormatter(console_formatter)
100
+
101
+ # Configure the logger
102
+ self._logger.setLevel(logging.DEBUG) # Logger must allow DEBUG for OTLP
103
+
104
+ # Remove any existing handlers (except OTLP when appropriate)
105
+ for handler in self._logger.handlers[:]:
106
+ if handler != self._otlp_handler:
107
+ self._logger.removeHandler(handler)
108
+ elif handler == self._otlp_handler and not config.enable_otlp:
109
+ self._logger.removeHandler(handler)
110
+
111
+ # Add OTLP handler if initialized and not disabled
112
+ if (
113
+ self._otlp_handler
114
+ and self._otlp_handler not in self._logger.handlers
115
+ and config.enable_otlp
116
+ ):
117
+ self._logger.addHandler(self._otlp_handler)
118
+
119
+ # Add console handler
120
+ self._logger.addHandler(console_handler)
121
+
122
+ # Add file handler if log_file is provided
123
+ if config.log_file:
124
+ file_handler = logging.FileHandler(config.log_file)
125
+ file_handler.setLevel(console_level) # Use same level as console
126
+ file_formatter = logging.Formatter(config.format)
127
+ file_handler.setFormatter(file_formatter)
128
+ self._logger.addHandler(file_handler)
129
+
130
+ def __getattr__(self, name: str) -> object:
131
+ """Delegate attribute access to the underlying logger."""
132
+ return getattr(self._logger, name)
133
+
134
+
135
+ logger: LoggerProtocol = RapidataLogger() # type: ignore[assignment]
@@ -0,0 +1,58 @@
1
+ from typing import Callable
2
+ from pydantic import BaseModel, Field
3
+
4
+ # Type alias for config update handlers
5
+ ConfigUpdateHandler = Callable[["LoggingConfig"], None]
6
+
7
+ # Global list to store registered handlers
8
+ _config_handlers: list[ConfigUpdateHandler] = []
9
+
10
+
11
+ def register_config_handler(handler: ConfigUpdateHandler) -> None:
12
+ """Register a handler to be called when the logging configuration updates."""
13
+ _config_handlers.append(handler)
14
+
15
+
16
+ def unregister_config_handler(handler: ConfigUpdateHandler) -> None:
17
+ """Unregister a previously registered handler."""
18
+ if handler in _config_handlers:
19
+ _config_handlers.remove(handler)
20
+
21
+
22
+ class LoggingConfig(BaseModel):
23
+ """
24
+ Holds the configuration for the logging process.
25
+
26
+ Args:
27
+ level (str): The logging level. Defaults to "WARNING".
28
+ log_file (str | None): The logging file. Defaults to None.
29
+ format (str): The logging format. Defaults to "%(asctime)s - %(name)s - %(levelname)s - %(message)s".
30
+ silent_mode (bool): Whether to disable the prints and progress bars. Does NOT affect the logging. Defaults to False.
31
+ enable_otlp (bool): Whether to enable OpenTelemetry trace logs. Defaults to True.
32
+ """
33
+
34
+ level: str = Field(default="WARNING")
35
+ log_file: str | None = Field(default=None)
36
+ format: str = Field(default="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
37
+ silent_mode: bool = Field(default=False)
38
+ enable_otlp: bool = Field(default=True)
39
+
40
+ def __init__(self, **kwargs):
41
+ super().__init__(**kwargs)
42
+ self._notify_handlers()
43
+
44
+ def __setattr__(self, name: str, value) -> None:
45
+ super().__setattr__(name, value)
46
+ self._notify_handlers()
47
+
48
+ def _notify_handlers(self) -> None:
49
+ """Notify all registered handlers that the configuration has updated."""
50
+ for handler in _config_handlers:
51
+ try:
52
+ handler(self)
53
+ except Exception as e:
54
+ # Log the error but don't let one handler failure break others
55
+ print(f"Warning: Config handler failed: {e}")
56
+
57
+
58
+ # Tracer is now handled in tracer.py with event-based updates
@@ -0,0 +1,6 @@
1
+ from rapidata.rapidata_client.config import rapidata_config
2
+
3
+
4
+ def managed_print(*args, **kwargs) -> None:
5
+ if not rapidata_config.logging.silent_mode:
6
+ print(*args, **kwargs)
@@ -0,0 +1,14 @@
1
+ from pydantic import BaseModel, Field
2
+
3
+
4
+ class OrderConfig(BaseModel):
5
+ """
6
+ Holds the configuration for the order process.
7
+
8
+ Args:
9
+ minOrderDatapointsForValidation (int): The minimum number of datapoints required so that an automatic validationset gets created if no recommended was found. Defaults to 50.
10
+ autoValidationSetSize (int): The maximum size of the auto-generated validation set. Defaults to 20.
11
+ """
12
+
13
+ minOrderDatapointsForValidation: int = Field(default=50)
14
+ autoValidationSetSize: int = Field(default=20)
@@ -1,5 +1,9 @@
1
1
  from pydantic import BaseModel, Field
2
2
 
3
+ from rapidata.rapidata_client.config.logging_config import LoggingConfig
4
+ from rapidata.rapidata_client.config.order_config import OrderConfig
5
+ from rapidata.rapidata_client.config.upload_config import UploadConfig
6
+
3
7
 
4
8
  class RapidataConfig(BaseModel):
5
9
  """
@@ -8,24 +12,25 @@ class RapidataConfig(BaseModel):
8
12
  To adjust the configurations used, you can modify the `rapidata_config` object.
9
13
 
10
14
  Args:
11
- maxUploadWorkers (int): The maximum number of worker threads for processing media paths. Defaults to 10.
12
- uploadMaxRetries (int): The maximum number of retries for failed uploads. Defaults to 3.
13
15
  enableBetaFeatures (bool): Whether to enable beta features. Defaults to False.
14
- minOrderDatapointsForValidation (int): The minimum number of datapoints required so that an automatic validationset gets created if no recommended was found. Defaults to 50.
15
- autoValidationSetSize (int): The maximum size of the auto-generated validation set. Defaults to 20.
16
+ upload (UploadConfig): The configuration for the upload process.
17
+ Such as the maximum number of worker threads for processing media paths and the maximum number of retries for failed uploads.
18
+ order (OrderConfig): The configuration for the order process.
19
+ Such as the minimum number of datapoints required so that an automatic validationset gets created if no recommended was found.
20
+ logging (LoggingConfig): The configuration for the logging process.
21
+ Such as the logging level and the logging file.
16
22
 
17
23
  Example:
18
24
  ```python
19
25
  from rapidata import rapidata_config
20
- rapidata_config.maxUploadWorkers = 20
26
+ rapidata_config.upload.maxUploadWorkers = 20
21
27
  ```
22
28
  """
23
29
 
24
- maxUploadWorkers: int = Field(default=10)
25
- uploadMaxRetries: int = Field(default=3)
26
30
  enableBetaFeatures: bool = False
27
- minOrderDatapointsForValidation: int = Field(default=50)
28
- autoValidationSetSize: int = Field(default=20)
31
+ upload: UploadConfig = Field(default_factory=UploadConfig)
32
+ order: OrderConfig = Field(default_factory=OrderConfig)
33
+ logging: LoggingConfig = Field(default_factory=LoggingConfig)
29
34
 
30
35
 
31
36
  rapidata_config = RapidataConfig()
@@ -0,0 +1,130 @@
1
+ from typing import Protocol, runtime_checkable, Any
2
+ from opentelemetry import trace
3
+ from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
4
+ from opentelemetry.sdk.trace import TracerProvider
5
+ from opentelemetry.sdk.trace.export import BatchSpanProcessor
6
+ from opentelemetry.sdk.resources import Resource
7
+ from rapidata import __version__
8
+ from .logging_config import LoggingConfig, register_config_handler
9
+ from rapidata.rapidata_client.config import logger
10
+
11
+
12
+ @runtime_checkable
13
+ class TracerProtocol(Protocol):
14
+ """Protocol that defines the tracer interface for type checking."""
15
+
16
+ def start_span(self, name: str, *args, **kwargs) -> Any: ...
17
+ def start_as_current_span(self, name: str, *args, **kwargs) -> Any: ...
18
+
19
+
20
+ class NoOpSpan:
21
+ """A no-op span that does nothing when tracing is disabled."""
22
+
23
+ def __enter__(self):
24
+ return self
25
+
26
+ def __exit__(self, *args):
27
+ pass
28
+
29
+ def set_attribute(self, *args, **kwargs):
30
+ pass
31
+
32
+ def set_status(self, *args, **kwargs):
33
+ pass
34
+
35
+ def add_event(self, *args, **kwargs):
36
+ pass
37
+
38
+ def end(self, *args, **kwargs):
39
+ pass
40
+
41
+ def __getattr__(self, name: str) -> Any:
42
+ """Return self for any method call to maintain chainability."""
43
+ return lambda *args, **kwargs: self
44
+
45
+
46
+ class NoOpTracer:
47
+ """A no-op tracer that returns no-op spans when tracing is disabled."""
48
+
49
+ def start_span(self, name: str, *args, **kwargs) -> NoOpSpan:
50
+ return NoOpSpan()
51
+
52
+ def start_as_current_span(self, name: str, *args, **kwargs) -> NoOpSpan:
53
+ return NoOpSpan()
54
+
55
+ def __getattr__(self, name: str) -> Any:
56
+ """Delegate to no-op behavior."""
57
+ return lambda *args, **kwargs: NoOpSpan()
58
+
59
+
60
+ class RapidataTracer:
61
+ """Tracer implementation that updates when the configuration changes."""
62
+
63
+ def __init__(self, name: str = __name__):
64
+ self._name = name
65
+ self._otlp_initialized = False
66
+ self._tracer_provider = None
67
+ self._real_tracer = None
68
+ self._no_op_tracer = NoOpTracer()
69
+ self._enabled = True # Default to enabled
70
+
71
+ # Register this tracer to receive configuration updates
72
+ register_config_handler(self._handle_config_update)
73
+
74
+ def _handle_config_update(self, config: LoggingConfig) -> None:
75
+ """Handle configuration updates."""
76
+ self._update_tracer(config)
77
+
78
+ def _update_tracer(self, config: LoggingConfig) -> None:
79
+ """Update the tracer based on the new configuration."""
80
+ self._enabled = config.enable_otlp
81
+
82
+ # Initialize OTLP tracing only once and only if not disabled
83
+ if not self._otlp_initialized and config.enable_otlp:
84
+ try:
85
+ resource = Resource.create(
86
+ {
87
+ "service.name": "Rapidata.Python.SDK",
88
+ "service.version": __version__,
89
+ }
90
+ )
91
+
92
+ self._tracer_provider = TracerProvider(resource=resource)
93
+ trace.set_tracer_provider(self._tracer_provider)
94
+
95
+ exporter = OTLPSpanExporter(
96
+ endpoint="https://otlp-sdk.rapidata.ai/v1/traces",
97
+ timeout=30,
98
+ )
99
+
100
+ span_processor = BatchSpanProcessor(exporter)
101
+ self._tracer_provider.add_span_processor(span_processor)
102
+
103
+ self._real_tracer = trace.get_tracer(self._name)
104
+ self._otlp_initialized = True
105
+
106
+ except Exception as e:
107
+ logger.warning(f"Failed to initialize tracing: {e}")
108
+ self._enabled = False
109
+
110
+ def start_span(self, name: str, *args, **kwargs) -> Any:
111
+ """Start a span, or return a no-op span if tracing is disabled."""
112
+ if self._enabled and self._real_tracer:
113
+ return self._real_tracer.start_span(name, *args, **kwargs)
114
+ return self._no_op_tracer.start_span(name, *args, **kwargs)
115
+
116
+ def start_as_current_span(self, name: str, *args, **kwargs) -> Any:
117
+ """Start a span as current, or return a no-op span if tracing is disabled."""
118
+ if self._enabled and self._real_tracer:
119
+ return self._real_tracer.start_as_current_span(name, *args, **kwargs)
120
+ return self._no_op_tracer.start_as_current_span(name, *args, **kwargs)
121
+
122
+ def __getattr__(self, name: str) -> Any:
123
+ """Delegate attribute access to the appropriate tracer."""
124
+ if self._enabled and self._real_tracer:
125
+ return getattr(self._real_tracer, name)
126
+ return getattr(self._no_op_tracer, name)
127
+
128
+
129
+ # Create the main tracer instance - type checkers will see it as TracerProtocol
130
+ tracer: TracerProtocol = RapidataTracer() # type: ignore[assignment]
@@ -0,0 +1,14 @@
1
+ from pydantic import BaseModel, Field
2
+
3
+
4
+ class UploadConfig(BaseModel):
5
+ """
6
+ Holds the configuration for the upload process.
7
+
8
+ Args:
9
+ maxWorkers (int): The maximum number of worker threads for processing media paths. Defaults to 10.
10
+ maxRetries (int): The maximum number of retries for failed uploads. Defaults to 3.
11
+ """
12
+
13
+ maxWorkers: int = Field(default=10)
14
+ maxRetries: int = Field(default=3)
@@ -24,7 +24,7 @@ from rapidata.rapidata_client.datapoints.metadata._media_asset_metadata import (
24
24
  MediaAssetMetadata,
25
25
  )
26
26
  from rapidata.rapidata_client.datapoints.metadata._prompt_metadata import PromptMetadata
27
- from rapidata.rapidata_client.logging import logger
27
+ from rapidata.rapidata_client.config import logger
28
28
 
29
29
 
30
30
  class Datapoint:
@@ -17,7 +17,7 @@ from pydantic import StrictStr, StrictBytes
17
17
  import logging
18
18
  from functools import cached_property
19
19
  from rapidata.rapidata_client.datapoints.assets._sessions import SessionManager
20
- from rapidata.rapidata_client.logging import logger
20
+ from rapidata.rapidata_client.config import logger
21
21
  from rapidata.rapidata_client.datapoints.assets.constants import (
22
22
  ALLOWED_IMAGE_EXTENSIONS,
23
23
  ALLOWED_MEDIA_EXTENSIONS,
@@ -18,8 +18,8 @@ class SessionManager:
18
18
  requests.Session: A singleton requests session with retry logic.
19
19
  """
20
20
  if cls._session is None:
21
- max_retries: int = rapidata_config.uploadMaxRetries
22
- max_workers: int = rapidata_config.maxUploadWorkers
21
+ max_retries: int = rapidata_config.upload.maxRetries
22
+ max_workers: int = rapidata_config.upload.maxWorkers
23
23
  cls._session = requests.Session()
24
24
  retries = Retry(
25
25
  total=max_retries,