rapidata 2.37.0__py3-none-any.whl → 2.39.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rapidata might be problematic. Click here for more details.

Files changed (117) hide show
  1. rapidata/__init__.py +3 -4
  2. rapidata/api_client/__init__.py +4 -5
  3. rapidata/api_client/api/benchmark_api.py +289 -3
  4. rapidata/api_client/api/leaderboard_api.py +35 -1
  5. rapidata/api_client/api/participant_api.py +289 -3
  6. rapidata/api_client/api/validation_set_api.py +119 -400
  7. rapidata/api_client/models/__init__.py +4 -5
  8. rapidata/api_client/models/ab_test_selection_a_inner.py +1 -1
  9. rapidata/api_client/models/compare_workflow_model1.py +1 -8
  10. rapidata/api_client/models/conditional_validation_selection.py +4 -9
  11. rapidata/api_client/models/confidence_interval.py +98 -0
  12. rapidata/api_client/models/create_simple_pipeline_model_pipeline_steps_inner.py +8 -22
  13. rapidata/api_client/models/get_standing_by_id_result.py +7 -2
  14. rapidata/api_client/models/get_validation_set_by_id_result.py +4 -2
  15. rapidata/api_client/models/simple_workflow_model1.py +1 -8
  16. rapidata/api_client/models/standing_by_leaderboard.py +10 -4
  17. rapidata/api_client/models/update_benchmark_model.py +87 -0
  18. rapidata/api_client/models/update_participant_model.py +87 -0
  19. rapidata/api_client/models/update_validation_set_model.py +93 -0
  20. rapidata/api_client/models/validation_chance.py +20 -3
  21. rapidata/api_client/models/validation_set_model.py +5 -42
  22. rapidata/api_client_README.md +7 -7
  23. rapidata/rapidata_client/__init__.py +1 -4
  24. rapidata/rapidata_client/api/{rapidata_exception.py → rapidata_api_client.py} +119 -2
  25. rapidata/rapidata_client/benchmark/leaderboard/rapidata_leaderboard.py +88 -46
  26. rapidata/rapidata_client/benchmark/participant/_participant.py +26 -9
  27. rapidata/rapidata_client/benchmark/rapidata_benchmark.py +310 -210
  28. rapidata/rapidata_client/benchmark/rapidata_benchmark_manager.py +134 -75
  29. rapidata/rapidata_client/config/__init__.py +3 -0
  30. rapidata/rapidata_client/config/logger.py +135 -0
  31. rapidata/rapidata_client/config/logging_config.py +58 -0
  32. rapidata/rapidata_client/config/managed_print.py +6 -0
  33. rapidata/rapidata_client/config/order_config.py +14 -0
  34. rapidata/rapidata_client/config/rapidata_config.py +15 -10
  35. rapidata/rapidata_client/config/tracer.py +130 -0
  36. rapidata/rapidata_client/config/upload_config.py +14 -0
  37. rapidata/rapidata_client/datapoints/_datapoint.py +1 -1
  38. rapidata/rapidata_client/datapoints/assets/__init__.py +1 -0
  39. rapidata/rapidata_client/datapoints/assets/_base_asset.py +2 -0
  40. rapidata/rapidata_client/datapoints/assets/_media_asset.py +1 -1
  41. rapidata/rapidata_client/datapoints/assets/_sessions.py +2 -2
  42. rapidata/rapidata_client/datapoints/assets/_text_asset.py +2 -2
  43. rapidata/rapidata_client/datapoints/assets/data_type_enum.py +1 -1
  44. rapidata/rapidata_client/datapoints/metadata/_media_asset_metadata.py +9 -8
  45. rapidata/rapidata_client/datapoints/metadata/_prompt_metadata.py +1 -2
  46. rapidata/rapidata_client/demographic/demographic_manager.py +16 -14
  47. rapidata/rapidata_client/filter/_base_filter.py +11 -5
  48. rapidata/rapidata_client/filter/age_filter.py +9 -3
  49. rapidata/rapidata_client/filter/and_filter.py +20 -5
  50. rapidata/rapidata_client/filter/campaign_filter.py +7 -1
  51. rapidata/rapidata_client/filter/country_filter.py +8 -2
  52. rapidata/rapidata_client/filter/custom_filter.py +9 -3
  53. rapidata/rapidata_client/filter/gender_filter.py +9 -3
  54. rapidata/rapidata_client/filter/language_filter.py +12 -5
  55. rapidata/rapidata_client/filter/models/age_group.py +4 -4
  56. rapidata/rapidata_client/filter/models/gender.py +4 -2
  57. rapidata/rapidata_client/filter/new_user_filter.py +3 -4
  58. rapidata/rapidata_client/filter/not_filter.py +17 -5
  59. rapidata/rapidata_client/filter/or_filter.py +20 -5
  60. rapidata/rapidata_client/filter/rapidata_filters.py +12 -9
  61. rapidata/rapidata_client/filter/response_count_filter.py +6 -0
  62. rapidata/rapidata_client/filter/user_score_filter.py +17 -5
  63. rapidata/rapidata_client/order/_rapidata_dataset.py +45 -17
  64. rapidata/rapidata_client/order/_rapidata_order_builder.py +19 -13
  65. rapidata/rapidata_client/order/rapidata_order.py +60 -48
  66. rapidata/rapidata_client/order/rapidata_order_manager.py +231 -197
  67. rapidata/rapidata_client/order/rapidata_results.py +71 -57
  68. rapidata/rapidata_client/rapidata_client.py +36 -23
  69. rapidata/rapidata_client/referee/__init__.py +1 -1
  70. rapidata/rapidata_client/referee/_base_referee.py +3 -1
  71. rapidata/rapidata_client/referee/_early_stopping_referee.py +2 -2
  72. rapidata/rapidata_client/selection/_base_selection.py +6 -0
  73. rapidata/rapidata_client/selection/ab_test_selection.py +7 -3
  74. rapidata/rapidata_client/selection/capped_selection.py +2 -2
  75. rapidata/rapidata_client/selection/conditional_validation_selection.py +12 -6
  76. rapidata/rapidata_client/selection/demographic_selection.py +9 -6
  77. rapidata/rapidata_client/selection/rapidata_selections.py +11 -8
  78. rapidata/rapidata_client/selection/shuffling_selection.py +5 -5
  79. rapidata/rapidata_client/selection/static_selection.py +5 -10
  80. rapidata/rapidata_client/selection/validation_selection.py +9 -5
  81. rapidata/rapidata_client/settings/_rapidata_setting.py +8 -0
  82. rapidata/rapidata_client/settings/alert_on_fast_response.py +8 -5
  83. rapidata/rapidata_client/settings/allow_neither_both.py +1 -0
  84. rapidata/rapidata_client/settings/custom_setting.py +3 -2
  85. rapidata/rapidata_client/settings/free_text_minimum_characters.py +9 -4
  86. rapidata/rapidata_client/settings/models/translation_behaviour_options.py +3 -2
  87. rapidata/rapidata_client/settings/no_shuffle.py +4 -2
  88. rapidata/rapidata_client/settings/play_video_until_the_end.py +7 -4
  89. rapidata/rapidata_client/settings/rapidata_settings.py +4 -3
  90. rapidata/rapidata_client/settings/translation_behaviour.py +7 -5
  91. rapidata/rapidata_client/validation/rapidata_validation_set.py +23 -17
  92. rapidata/rapidata_client/validation/rapids/box.py +3 -1
  93. rapidata/rapidata_client/validation/rapids/rapids.py +7 -1
  94. rapidata/rapidata_client/validation/rapids/rapids_manager.py +174 -141
  95. rapidata/rapidata_client/validation/validation_set_manager.py +285 -268
  96. rapidata/rapidata_client/workflow/__init__.py +1 -1
  97. rapidata/rapidata_client/workflow/_base_workflow.py +6 -1
  98. rapidata/rapidata_client/workflow/_classify_workflow.py +6 -0
  99. rapidata/rapidata_client/workflow/_compare_workflow.py +6 -0
  100. rapidata/rapidata_client/workflow/_draw_workflow.py +6 -0
  101. rapidata/rapidata_client/workflow/_evaluation_workflow.py +6 -0
  102. rapidata/rapidata_client/workflow/_free_text_workflow.py +6 -0
  103. rapidata/rapidata_client/workflow/_locate_workflow.py +6 -0
  104. rapidata/rapidata_client/workflow/_ranking_workflow.py +12 -0
  105. rapidata/rapidata_client/workflow/_select_words_workflow.py +6 -0
  106. rapidata/rapidata_client/workflow/_timestamp_workflow.py +6 -0
  107. rapidata/service/__init__.py +1 -1
  108. rapidata/service/credential_manager.py +1 -1
  109. rapidata/service/local_file_service.py +9 -8
  110. rapidata/service/openapi_service.py +2 -2
  111. {rapidata-2.37.0.dist-info → rapidata-2.39.0.dist-info}/METADATA +4 -1
  112. {rapidata-2.37.0.dist-info → rapidata-2.39.0.dist-info}/RECORD +114 -107
  113. rapidata/rapidata_client/logging/__init__.py +0 -2
  114. rapidata/rapidata_client/logging/logger.py +0 -122
  115. rapidata/rapidata_client/logging/output_manager.py +0 -20
  116. {rapidata-2.37.0.dist-info → rapidata-2.39.0.dist-info}/LICENSE +0 -0
  117. {rapidata-2.37.0.dist-info → rapidata-2.39.0.dist-info}/WHEEL +0 -0
@@ -1,4 +1,4 @@
1
- from typing import Optional
1
+ from typing import Optional, cast
2
2
  from rapidata.rapidata_client.benchmark.rapidata_benchmark import RapidataBenchmark
3
3
  from rapidata.api_client.models.create_benchmark_model import CreateBenchmarkModel
4
4
  from rapidata.service.openapi_service import OpenAPIService
@@ -9,6 +9,7 @@ from rapidata.api_client.models.filter import Filter
9
9
  from rapidata.api_client.models.sort_criterion import SortCriterion
10
10
  from rapidata.api_client.models.sort_direction import SortDirection
11
11
  from rapidata.api_client.models.filter_operator import FilterOperator
12
+ from rapidata.rapidata_client.config import logger, tracer
12
13
 
13
14
 
14
15
  class RapidataBenchmarkManager:
@@ -29,7 +30,7 @@ class RapidataBenchmarkManager:
29
30
  def create_new_benchmark(
30
31
  self,
31
32
  name: str,
32
- identifiers: list[str],
33
+ identifiers: Optional[list[str]] = None,
33
34
  prompts: Optional[list[str | None]] = None,
34
35
  prompt_assets: Optional[list[str | None]] = None,
35
36
  tags: Optional[list[list[str] | None]] = None,
@@ -38,11 +39,11 @@ class RapidataBenchmarkManager:
38
39
  Creates a new benchmark with the given name, identifiers, prompts, and media assets.
39
40
  Everything is matched up by the indexes of the lists.
40
41
 
41
- prompts or prompt_assets must be provided.
42
+ prompts or identifiers must be provided, as well as prompts or prompt_assets.
42
43
 
43
44
  Args:
44
45
  name: The name of the benchmark.
45
- identifiers: The identifiers of the prompts/assets/tags that will be used to match up the media
46
+ identifiers: The identifiers of the prompts/assets/tags that will be used to match up the media. If not provided, it will use the prompts as the identifiers.
46
47
  prompts: The prompts that will be registered for the benchmark.
47
48
  prompt_assets: The prompt assets that will be registered for the benchmark.
48
49
  tags: The tags that will be associated with the prompts to use for filtering the leaderboard results. They will NOT be shown to the users.
@@ -58,78 +59,133 @@ class RapidataBenchmarkManager:
58
59
  benchmark = create_new_benchmark(name=name, identifiers=identifiers, prompts=prompts, prompt_assets=prompt_assets, tags=tags)
59
60
  ```
60
61
  """
61
- if not isinstance(name, str):
62
- raise ValueError("Name must be a string.")
63
-
64
- if prompts and (
65
- not isinstance(prompts, list)
66
- or not all(isinstance(prompt, str) or prompt is None for prompt in prompts)
62
+ with tracer.start_as_current_span(
63
+ "RapidataBenchmarkManager.create_new_benchmark"
67
64
  ):
68
- raise ValueError("Prompts must be a list of strings or None.")
65
+ if not isinstance(name, str):
66
+ raise ValueError("Name must be a string.")
67
+
68
+ if prompts and (
69
+ not isinstance(prompts, list)
70
+ or not all(
71
+ isinstance(prompt, str) or prompt is None for prompt in prompts
72
+ )
73
+ ):
74
+ raise ValueError("Prompts must be a list of strings or None.")
75
+
76
+ if prompt_assets and (
77
+ not isinstance(prompt_assets, list)
78
+ or not all(
79
+ isinstance(asset, str) or asset is None for asset in prompt_assets
80
+ )
81
+ ):
82
+ raise ValueError("Media assets must be a list of strings or None.")
83
+
84
+ if identifiers and (
85
+ not isinstance(identifiers, list)
86
+ or not all(isinstance(identifier, str) for identifier in identifiers)
87
+ ):
88
+ raise ValueError("Identifiers must be a list of strings.")
89
+
90
+ if identifiers:
91
+ if not len(set(identifiers)) == len(identifiers):
92
+ raise ValueError("Identifiers must be unique.")
93
+
94
+ if tags is not None:
95
+ if not isinstance(tags, list):
96
+ raise ValueError("Tags must be a list of lists of strings or None.")
97
+
98
+ for tag in tags:
99
+ if tag is not None and (
100
+ not isinstance(tag, list)
101
+ or not all(isinstance(item, str) for item in tag)
102
+ ):
103
+ raise ValueError(
104
+ "Tags must be a list of lists of strings or None."
105
+ )
69
106
 
70
- if prompt_assets and (
71
- not isinstance(prompt_assets, list)
72
- or not all(
73
- isinstance(asset, str) or asset is None for asset in prompt_assets
74
- )
75
- ):
76
- raise ValueError("Media assets must be a list of strings or None.")
107
+ if not identifiers and not prompts:
108
+ raise ValueError(
109
+ "At least one of identifiers or prompts must be provided."
110
+ )
111
+
112
+ if not prompts and not prompt_assets:
113
+ raise ValueError(
114
+ "At least one of prompts or media assets must be provided."
115
+ )
116
+
117
+ if not identifiers:
118
+ assert prompts is not None
119
+ if not len(set(prompts)) == len(prompts):
120
+ raise ValueError(
121
+ "Prompts must be unique. Otherwise use identifiers."
122
+ )
123
+ if any(prompt is None for prompt in prompts):
124
+ raise ValueError(
125
+ "Prompts must not be None. Otherwise use identifiers."
126
+ )
77
127
 
78
- if not isinstance(identifiers, list) or not all(
79
- isinstance(identifier, str) for identifier in identifiers
80
- ):
81
- raise ValueError("Identifiers must be a list of strings.")
128
+ identifiers = cast(list[str], prompts)
82
129
 
83
- if prompts and len(identifiers) != len(prompts):
84
- raise ValueError("Identifiers and prompts must have the same length.")
130
+ assert identifiers is not None
85
131
 
86
- if prompt_assets and len(identifiers) != len(prompt_assets):
87
- raise ValueError("Identifiers and media assets must have the same length.")
132
+ expected_length = len(identifiers)
88
133
 
89
- if not prompts and not prompt_assets:
90
- raise ValueError(
91
- "At least one of prompts or media assets must be provided."
92
- )
134
+ if not prompts:
135
+ prompts = cast(list[str | None], [None] * expected_length)
93
136
 
94
- if len(set(identifiers)) != len(identifiers):
95
- raise ValueError("Identifiers must be unique.")
137
+ if not prompt_assets:
138
+ prompt_assets = cast(list[str | None], [None] * expected_length)
96
139
 
97
- if tags and len(identifiers) != len(tags):
98
- raise ValueError("Identifiers and tags must have the same length.")
140
+ if not tags:
141
+ tags = cast(list[list[str] | None], [None] * expected_length)
99
142
 
100
- benchmark_result = self.__openapi_service.benchmark_api.benchmark_post(
101
- create_benchmark_model=CreateBenchmarkModel(
102
- name=name,
143
+ # At this point, all variables are guaranteed to be lists, not None
144
+ assert prompts is not None
145
+ assert prompt_assets is not None
146
+ assert tags is not None
147
+
148
+ if not (expected_length == len(prompts) == len(prompt_assets) == len(tags)):
149
+ raise ValueError(
150
+ "Identifiers, prompts, media assets, and tags must have the same length or set to None."
151
+ )
152
+
153
+ logger.info("Creating new benchmark %s", name)
154
+
155
+ benchmark_result = self.__openapi_service.benchmark_api.benchmark_post(
156
+ create_benchmark_model=CreateBenchmarkModel(
157
+ name=name,
158
+ )
103
159
  )
104
- )
105
160
 
106
- benchmark = RapidataBenchmark(name, benchmark_result.id, self.__openapi_service)
161
+ logger.info("Benchmark created with id %s", benchmark_result.id)
107
162
 
108
- prompts_list = prompts if prompts is not None else [None] * len(identifiers)
109
- media_assets_list = (
110
- prompt_assets if prompt_assets is not None else [None] * len(identifiers)
111
- )
112
- tags_list = tags if tags is not None else [None] * len(identifiers)
163
+ benchmark = RapidataBenchmark(
164
+ name, benchmark_result.id, self.__openapi_service
165
+ )
113
166
 
114
- for identifier, prompt, asset, tag in zip(
115
- identifiers, prompts_list, media_assets_list, tags_list
116
- ):
117
- benchmark.add_prompt(identifier, prompt, asset, tag)
167
+ for identifier, prompt, asset, tag in zip(
168
+ identifiers, prompts, prompt_assets, tags
169
+ ):
170
+ benchmark.add_prompt(identifier, prompt, asset, tag)
118
171
 
119
- return benchmark
172
+ return benchmark
120
173
 
121
174
  def get_benchmark_by_id(self, id: str) -> RapidataBenchmark:
122
175
  """
123
176
  Returns a benchmark by its ID.
124
177
  """
125
- benchmark_result = (
126
- self.__openapi_service.benchmark_api.benchmark_benchmark_id_get(
127
- benchmark_id=id
178
+ with tracer.start_as_current_span(
179
+ "RapidataBenchmarkManager.get_benchmark_by_id"
180
+ ):
181
+ benchmark_result = (
182
+ self.__openapi_service.benchmark_api.benchmark_benchmark_id_get(
183
+ benchmark_id=id
184
+ )
185
+ )
186
+ return RapidataBenchmark(
187
+ benchmark_result.name, benchmark_result.id, self.__openapi_service
128
188
  )
129
- )
130
- return RapidataBenchmark(
131
- benchmark_result.name, benchmark_result.id, self.__openapi_service
132
- )
133
189
 
134
190
  def find_benchmarks(
135
191
  self, name: str = "", amount: int = 10
@@ -137,24 +193,27 @@ class RapidataBenchmarkManager:
137
193
  """
138
194
  Returns a list of benchmarks by their name.
139
195
  """
140
- benchmark_result = self.__openapi_service.benchmark_api.benchmarks_get(
141
- QueryModel(
142
- page=PageInfo(index=1, size=amount),
143
- filter=RootFilter(
144
- filters=[
145
- Filter(
146
- field="Name", operator=FilterOperator.CONTAINS, value=name
196
+ with tracer.start_as_current_span("RapidataBenchmarkManager.find_benchmarks"):
197
+ benchmark_result = self.__openapi_service.benchmark_api.benchmarks_get(
198
+ QueryModel(
199
+ page=PageInfo(index=1, size=amount),
200
+ filter=RootFilter(
201
+ filters=[
202
+ Filter(
203
+ field="Name",
204
+ operator=FilterOperator.CONTAINS,
205
+ value=name,
206
+ )
207
+ ]
208
+ ),
209
+ sortCriteria=[
210
+ SortCriterion(
211
+ direction=SortDirection.DESC, propertyName="CreatedAt"
147
212
  )
148
- ]
149
- ),
150
- sortCriteria=[
151
- SortCriterion(
152
- direction=SortDirection.DESC, propertyName="CreatedAt"
153
- )
154
- ],
213
+ ],
214
+ )
155
215
  )
156
- )
157
- return [
158
- RapidataBenchmark(benchmark.name, benchmark.id, self.__openapi_service)
159
- for benchmark in benchmark_result.items
160
- ]
216
+ return [
217
+ RapidataBenchmark(benchmark.name, benchmark.id, self.__openapi_service)
218
+ for benchmark in benchmark_result.items
219
+ ]
@@ -1 +1,4 @@
1
+ from .logger import logger # has to be instantiated before rapidata_config
2
+ from .tracer import tracer # has to be instantiated before rapidata_config
1
3
  from .rapidata_config import rapidata_config
4
+ from .managed_print import managed_print
@@ -0,0 +1,135 @@
1
+ import logging
2
+ from typing import Protocol, runtime_checkable
3
+ from opentelemetry._logs import set_logger_provider
4
+ from opentelemetry.exporter.otlp.proto.http._log_exporter import OTLPLogExporter
5
+ from opentelemetry.sdk._logs import LoggerProvider, LoggingHandler
6
+ from opentelemetry.sdk._logs.export import BatchLogRecordProcessor
7
+ from opentelemetry.sdk.resources import Resource
8
+ from rapidata import __version__
9
+ from .logging_config import LoggingConfig, register_config_handler
10
+
11
+
12
+ @runtime_checkable
13
+ class LoggerProtocol(Protocol):
14
+ """Protocol that defines the logger interface for type checking."""
15
+
16
+ def debug(self, msg: object, *args, **kwargs) -> None: ...
17
+ def info(self, msg: object, *args, **kwargs) -> None: ...
18
+ def warning(self, msg: object, *args, **kwargs) -> None: ...
19
+ def warn(self, msg: object, *args, **kwargs) -> None: ...
20
+ def error(self, msg: object, *args, **kwargs) -> None: ...
21
+ def exception(self, msg: object, *args, exc_info=True, **kwargs) -> None: ...
22
+ def critical(self, msg: object, *args, **kwargs) -> None: ...
23
+ def fatal(self, msg: object, *args, **kwargs) -> None: ...
24
+ def log(self, level: int, msg: object, *args, **kwargs) -> None: ...
25
+ def isEnabledFor(self, level: int) -> bool: ...
26
+ def getEffectiveLevel(self) -> int: ...
27
+ def setLevel(self, level: int | str) -> None: ...
28
+ def addHandler(self, handler: logging.Handler) -> None: ...
29
+ def removeHandler(self, handler: logging.Handler) -> None: ...
30
+ @property
31
+ def handlers(self) -> list[logging.Handler]: ...
32
+ @property
33
+ def level(self) -> int: ...
34
+ @property
35
+ def name(self) -> str: ...
36
+
37
+
38
+ class RapidataLogger:
39
+ """Logger implementation that updates when the configuration changes."""
40
+
41
+ def __init__(self, name: str = "rapidata"):
42
+ self._logger = logging.getLogger(name)
43
+ self._otlp_initialized = False
44
+ self._otlp_handler = None
45
+
46
+ # Register this logger to receive configuration updates
47
+ register_config_handler(self._handle_config_update)
48
+
49
+ def _handle_config_update(self, config: LoggingConfig) -> None:
50
+ """Handle configuration updates."""
51
+ self._update_logger(config)
52
+
53
+ def _update_logger(self, config: LoggingConfig) -> None:
54
+ """Update the logger based on the new configuration."""
55
+ # Initialize OTLP logging only once and only if not disabled
56
+ if not self._otlp_initialized and config.enable_otlp:
57
+ try:
58
+ logger_provider = LoggerProvider(
59
+ resource=Resource.create(
60
+ {
61
+ "service.name": "Rapidata.Python.SDK",
62
+ "service.version": __version__,
63
+ }
64
+ ),
65
+ )
66
+ set_logger_provider(logger_provider)
67
+
68
+ exporter = OTLPLogExporter(
69
+ endpoint="https://otlp-sdk.rapidata.ai/v1/logs",
70
+ timeout=30,
71
+ )
72
+
73
+ processor = BatchLogRecordProcessor(
74
+ exporter,
75
+ max_queue_size=2048,
76
+ export_timeout_millis=30000,
77
+ max_export_batch_size=512,
78
+ )
79
+
80
+ logger_provider.add_log_record_processor(processor)
81
+
82
+ # OTLP handler - captures DEBUG and above
83
+ self._otlp_handler = LoggingHandler(logger_provider=logger_provider)
84
+ self._otlp_handler.setLevel(logging.DEBUG) # OTLP gets everything
85
+
86
+ self._otlp_initialized = True
87
+
88
+ except Exception as e:
89
+ self._logger.warning(f"Failed to initialize OTLP logging: {e}")
90
+ import traceback
91
+
92
+ traceback.print_exc()
93
+
94
+ # Console handler with configurable level
95
+ console_handler = logging.StreamHandler()
96
+ console_level = getattr(logging, config.level.upper())
97
+ console_handler.setLevel(console_level)
98
+ console_formatter = logging.Formatter(config.format)
99
+ console_handler.setFormatter(console_formatter)
100
+
101
+ # Configure the logger
102
+ self._logger.setLevel(logging.DEBUG) # Logger must allow DEBUG for OTLP
103
+
104
+ # Remove any existing handlers (except OTLP when appropriate)
105
+ for handler in self._logger.handlers[:]:
106
+ if handler != self._otlp_handler:
107
+ self._logger.removeHandler(handler)
108
+ elif handler == self._otlp_handler and not config.enable_otlp:
109
+ self._logger.removeHandler(handler)
110
+
111
+ # Add OTLP handler if initialized and not disabled
112
+ if (
113
+ self._otlp_handler
114
+ and self._otlp_handler not in self._logger.handlers
115
+ and config.enable_otlp
116
+ ):
117
+ self._logger.addHandler(self._otlp_handler)
118
+
119
+ # Add console handler
120
+ self._logger.addHandler(console_handler)
121
+
122
+ # Add file handler if log_file is provided
123
+ if config.log_file:
124
+ file_handler = logging.FileHandler(config.log_file)
125
+ file_handler.setLevel(console_level) # Use same level as console
126
+ file_formatter = logging.Formatter(config.format)
127
+ file_handler.setFormatter(file_formatter)
128
+ self._logger.addHandler(file_handler)
129
+
130
+ def __getattr__(self, name: str) -> object:
131
+ """Delegate attribute access to the underlying logger."""
132
+ return getattr(self._logger, name)
133
+
134
+
135
+ logger: LoggerProtocol = RapidataLogger() # type: ignore[assignment]
@@ -0,0 +1,58 @@
1
+ from typing import Callable
2
+ from pydantic import BaseModel, Field
3
+
4
+ # Type alias for config update handlers
5
+ ConfigUpdateHandler = Callable[["LoggingConfig"], None]
6
+
7
+ # Global list to store registered handlers
8
+ _config_handlers: list[ConfigUpdateHandler] = []
9
+
10
+
11
+ def register_config_handler(handler: ConfigUpdateHandler) -> None:
12
+ """Register a handler to be called when the logging configuration updates."""
13
+ _config_handlers.append(handler)
14
+
15
+
16
+ def unregister_config_handler(handler: ConfigUpdateHandler) -> None:
17
+ """Unregister a previously registered handler."""
18
+ if handler in _config_handlers:
19
+ _config_handlers.remove(handler)
20
+
21
+
22
+ class LoggingConfig(BaseModel):
23
+ """
24
+ Holds the configuration for the logging process.
25
+
26
+ Attributes:
27
+ level (str): The logging level. Defaults to "WARNING".
28
+ log_file (str | None): The logging file. Defaults to None.
29
+ format (str): The logging format. Defaults to "%(asctime)s - %(name)s - %(levelname)s - %(message)s".
30
+ silent_mode (bool): Whether to disable the prints and progress bars. Does NOT affect the logging. Defaults to False.
31
+ enable_otlp (bool): Whether to enable OpenTelemetry trace logs. Defaults to True.
32
+ """
33
+
34
+ level: str = Field(default="WARNING")
35
+ log_file: str | None = Field(default=None)
36
+ format: str = Field(default="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
37
+ silent_mode: bool = Field(default=False)
38
+ enable_otlp: bool = Field(default=True)
39
+
40
+ def __init__(self, **kwargs):
41
+ super().__init__(**kwargs)
42
+ self._notify_handlers()
43
+
44
+ def __setattr__(self, name: str, value) -> None:
45
+ super().__setattr__(name, value)
46
+ self._notify_handlers()
47
+
48
+ def _notify_handlers(self) -> None:
49
+ """Notify all registered handlers that the configuration has updated."""
50
+ for handler in _config_handlers:
51
+ try:
52
+ handler(self)
53
+ except Exception as e:
54
+ # Log the error but don't let one handler failure break others
55
+ print(f"Warning: Config handler failed: {e}")
56
+
57
+
58
+ # Tracer is now handled in tracer.py with event-based updates
@@ -0,0 +1,6 @@
1
+ from rapidata.rapidata_client.config import rapidata_config
2
+
3
+
4
+ def managed_print(*args, **kwargs) -> None:
5
+ if not rapidata_config.logging.silent_mode:
6
+ print(*args, **kwargs)
@@ -0,0 +1,14 @@
1
+ from pydantic import BaseModel, Field
2
+
3
+
4
+ class OrderConfig(BaseModel):
5
+ """
6
+ Holds the configuration for the order process.
7
+
8
+ Attributes:
9
+ minOrderDatapointsForValidation (int): The minimum number of datapoints required so that an automatic validationset gets created if no recommended was found. Defaults to 50.
10
+ autoValidationSetSize (int): The maximum size of the auto-generated validation set. Defaults to 20.
11
+ """
12
+
13
+ minOrderDatapointsForValidation: int = Field(default=50)
14
+ autoValidationSetSize: int = Field(default=20)
@@ -1,5 +1,9 @@
1
1
  from pydantic import BaseModel, Field
2
2
 
3
+ from rapidata.rapidata_client.config.logging_config import LoggingConfig
4
+ from rapidata.rapidata_client.config.order_config import OrderConfig
5
+ from rapidata.rapidata_client.config.upload_config import UploadConfig
6
+
3
7
 
4
8
  class RapidataConfig(BaseModel):
5
9
  """
@@ -7,25 +11,26 @@ class RapidataConfig(BaseModel):
7
11
 
8
12
  To adjust the configurations used, you can modify the `rapidata_config` object.
9
13
 
10
- Args:
11
- maxUploadWorkers (int): The maximum number of worker threads for processing media paths. Defaults to 10.
12
- uploadMaxRetries (int): The maximum number of retries for failed uploads. Defaults to 3.
14
+ Attributes:
13
15
  enableBetaFeatures (bool): Whether to enable beta features. Defaults to False.
14
- minOrderDatapointsForValidation (int): The minimum number of datapoints required so that an automatic validationset gets created if no recommended was found. Defaults to 50.
15
- autoValidationSetSize (int): The maximum size of the auto-generated validation set. Defaults to 20.
16
+ upload (UploadConfig): The configuration for the upload process.
17
+ Such as the maximum number of worker threads for processing media paths and the maximum number of retries for failed uploads.
18
+ order (OrderConfig): The configuration for the order process.
19
+ Such as the minimum number of datapoints required so that an automatic validationset gets created if no recommended was found.
20
+ logging (LoggingConfig): The configuration for the logging process.
21
+ Such as the logging level and the logging file.
16
22
 
17
23
  Example:
18
24
  ```python
19
25
  from rapidata import rapidata_config
20
- rapidata_config.maxUploadWorkers = 20
26
+ rapidata_config.upload.maxUploadWorkers = 20
21
27
  ```
22
28
  """
23
29
 
24
- maxUploadWorkers: int = Field(default=10)
25
- uploadMaxRetries: int = Field(default=3)
26
30
  enableBetaFeatures: bool = False
27
- minOrderDatapointsForValidation: int = Field(default=50)
28
- autoValidationSetSize: int = Field(default=20)
31
+ upload: UploadConfig = Field(default_factory=UploadConfig)
32
+ order: OrderConfig = Field(default_factory=OrderConfig)
33
+ logging: LoggingConfig = Field(default_factory=LoggingConfig)
29
34
 
30
35
 
31
36
  rapidata_config = RapidataConfig()
@@ -0,0 +1,130 @@
1
+ from typing import Protocol, runtime_checkable, Any
2
+ from opentelemetry import trace
3
+ from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
4
+ from opentelemetry.sdk.trace import TracerProvider
5
+ from opentelemetry.sdk.trace.export import BatchSpanProcessor
6
+ from opentelemetry.sdk.resources import Resource
7
+ from rapidata import __version__
8
+ from .logging_config import LoggingConfig, register_config_handler
9
+ from rapidata.rapidata_client.config import logger
10
+
11
+
12
+ @runtime_checkable
13
+ class TracerProtocol(Protocol):
14
+ """Protocol that defines the tracer interface for type checking."""
15
+
16
+ def start_span(self, name: str, *args, **kwargs) -> Any: ...
17
+ def start_as_current_span(self, name: str, *args, **kwargs) -> Any: ...
18
+
19
+
20
+ class NoOpSpan:
21
+ """A no-op span that does nothing when tracing is disabled."""
22
+
23
+ def __enter__(self):
24
+ return self
25
+
26
+ def __exit__(self, *args):
27
+ pass
28
+
29
+ def set_attribute(self, *args, **kwargs):
30
+ pass
31
+
32
+ def set_status(self, *args, **kwargs):
33
+ pass
34
+
35
+ def add_event(self, *args, **kwargs):
36
+ pass
37
+
38
+ def end(self, *args, **kwargs):
39
+ pass
40
+
41
+ def __getattr__(self, name: str) -> Any:
42
+ """Return self for any method call to maintain chainability."""
43
+ return lambda *args, **kwargs: self
44
+
45
+
46
+ class NoOpTracer:
47
+ """A no-op tracer that returns no-op spans when tracing is disabled."""
48
+
49
+ def start_span(self, name: str, *args, **kwargs) -> NoOpSpan:
50
+ return NoOpSpan()
51
+
52
+ def start_as_current_span(self, name: str, *args, **kwargs) -> NoOpSpan:
53
+ return NoOpSpan()
54
+
55
+ def __getattr__(self, name: str) -> Any:
56
+ """Delegate to no-op behavior."""
57
+ return lambda *args, **kwargs: NoOpSpan()
58
+
59
+
60
+ class RapidataTracer:
61
+ """Tracer implementation that updates when the configuration changes."""
62
+
63
+ def __init__(self, name: str = __name__):
64
+ self._name = name
65
+ self._otlp_initialized = False
66
+ self._tracer_provider = None
67
+ self._real_tracer = None
68
+ self._no_op_tracer = NoOpTracer()
69
+ self._enabled = True # Default to enabled
70
+
71
+ # Register this tracer to receive configuration updates
72
+ register_config_handler(self._handle_config_update)
73
+
74
+ def _handle_config_update(self, config: LoggingConfig) -> None:
75
+ """Handle configuration updates."""
76
+ self._update_tracer(config)
77
+
78
+ def _update_tracer(self, config: LoggingConfig) -> None:
79
+ """Update the tracer based on the new configuration."""
80
+ self._enabled = config.enable_otlp
81
+
82
+ # Initialize OTLP tracing only once and only if not disabled
83
+ if not self._otlp_initialized and config.enable_otlp:
84
+ try:
85
+ resource = Resource.create(
86
+ {
87
+ "service.name": "Rapidata.Python.SDK",
88
+ "service.version": __version__,
89
+ }
90
+ )
91
+
92
+ self._tracer_provider = TracerProvider(resource=resource)
93
+ trace.set_tracer_provider(self._tracer_provider)
94
+
95
+ exporter = OTLPSpanExporter(
96
+ endpoint="https://otlp-sdk.rapidata.ai/v1/traces",
97
+ timeout=30,
98
+ )
99
+
100
+ span_processor = BatchSpanProcessor(exporter)
101
+ self._tracer_provider.add_span_processor(span_processor)
102
+
103
+ self._real_tracer = trace.get_tracer(self._name)
104
+ self._otlp_initialized = True
105
+
106
+ except Exception as e:
107
+ logger.warning(f"Failed to initialize tracing: {e}")
108
+ self._enabled = False
109
+
110
+ def start_span(self, name: str, *args, **kwargs) -> Any:
111
+ """Start a span, or return a no-op span if tracing is disabled."""
112
+ if self._enabled and self._real_tracer:
113
+ return self._real_tracer.start_span(name, *args, **kwargs)
114
+ return self._no_op_tracer.start_span(name, *args, **kwargs)
115
+
116
+ def start_as_current_span(self, name: str, *args, **kwargs) -> Any:
117
+ """Start a span as current, or return a no-op span if tracing is disabled."""
118
+ if self._enabled and self._real_tracer:
119
+ return self._real_tracer.start_as_current_span(name, *args, **kwargs)
120
+ return self._no_op_tracer.start_as_current_span(name, *args, **kwargs)
121
+
122
+ def __getattr__(self, name: str) -> Any:
123
+ """Delegate attribute access to the appropriate tracer."""
124
+ if self._enabled and self._real_tracer:
125
+ return getattr(self._real_tracer, name)
126
+ return getattr(self._no_op_tracer, name)
127
+
128
+
129
+ # Create the main tracer instance - type checkers will see it as TracerProtocol
130
+ tracer: TracerProtocol = RapidataTracer() # type: ignore[assignment]