rapidata 2.37.0__py3-none-any.whl → 2.39.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rapidata might be problematic. Click here for more details.
- rapidata/__init__.py +3 -4
- rapidata/api_client/__init__.py +4 -5
- rapidata/api_client/api/benchmark_api.py +289 -3
- rapidata/api_client/api/leaderboard_api.py +35 -1
- rapidata/api_client/api/participant_api.py +289 -3
- rapidata/api_client/api/validation_set_api.py +119 -400
- rapidata/api_client/models/__init__.py +4 -5
- rapidata/api_client/models/ab_test_selection_a_inner.py +1 -1
- rapidata/api_client/models/compare_workflow_model1.py +1 -8
- rapidata/api_client/models/conditional_validation_selection.py +4 -9
- rapidata/api_client/models/confidence_interval.py +98 -0
- rapidata/api_client/models/create_simple_pipeline_model_pipeline_steps_inner.py +8 -22
- rapidata/api_client/models/get_standing_by_id_result.py +7 -2
- rapidata/api_client/models/get_validation_set_by_id_result.py +4 -2
- rapidata/api_client/models/simple_workflow_model1.py +1 -8
- rapidata/api_client/models/standing_by_leaderboard.py +10 -4
- rapidata/api_client/models/update_benchmark_model.py +87 -0
- rapidata/api_client/models/update_participant_model.py +87 -0
- rapidata/api_client/models/update_validation_set_model.py +93 -0
- rapidata/api_client/models/validation_chance.py +20 -3
- rapidata/api_client/models/validation_set_model.py +5 -42
- rapidata/api_client_README.md +7 -7
- rapidata/rapidata_client/__init__.py +1 -4
- rapidata/rapidata_client/api/{rapidata_exception.py → rapidata_api_client.py} +119 -2
- rapidata/rapidata_client/benchmark/leaderboard/rapidata_leaderboard.py +88 -46
- rapidata/rapidata_client/benchmark/participant/_participant.py +26 -9
- rapidata/rapidata_client/benchmark/rapidata_benchmark.py +310 -210
- rapidata/rapidata_client/benchmark/rapidata_benchmark_manager.py +134 -75
- rapidata/rapidata_client/config/__init__.py +3 -0
- rapidata/rapidata_client/config/logger.py +135 -0
- rapidata/rapidata_client/config/logging_config.py +58 -0
- rapidata/rapidata_client/config/managed_print.py +6 -0
- rapidata/rapidata_client/config/order_config.py +14 -0
- rapidata/rapidata_client/config/rapidata_config.py +15 -10
- rapidata/rapidata_client/config/tracer.py +130 -0
- rapidata/rapidata_client/config/upload_config.py +14 -0
- rapidata/rapidata_client/datapoints/_datapoint.py +1 -1
- rapidata/rapidata_client/datapoints/assets/__init__.py +1 -0
- rapidata/rapidata_client/datapoints/assets/_base_asset.py +2 -0
- rapidata/rapidata_client/datapoints/assets/_media_asset.py +1 -1
- rapidata/rapidata_client/datapoints/assets/_sessions.py +2 -2
- rapidata/rapidata_client/datapoints/assets/_text_asset.py +2 -2
- rapidata/rapidata_client/datapoints/assets/data_type_enum.py +1 -1
- rapidata/rapidata_client/datapoints/metadata/_media_asset_metadata.py +9 -8
- rapidata/rapidata_client/datapoints/metadata/_prompt_metadata.py +1 -2
- rapidata/rapidata_client/demographic/demographic_manager.py +16 -14
- rapidata/rapidata_client/filter/_base_filter.py +11 -5
- rapidata/rapidata_client/filter/age_filter.py +9 -3
- rapidata/rapidata_client/filter/and_filter.py +20 -5
- rapidata/rapidata_client/filter/campaign_filter.py +7 -1
- rapidata/rapidata_client/filter/country_filter.py +8 -2
- rapidata/rapidata_client/filter/custom_filter.py +9 -3
- rapidata/rapidata_client/filter/gender_filter.py +9 -3
- rapidata/rapidata_client/filter/language_filter.py +12 -5
- rapidata/rapidata_client/filter/models/age_group.py +4 -4
- rapidata/rapidata_client/filter/models/gender.py +4 -2
- rapidata/rapidata_client/filter/new_user_filter.py +3 -4
- rapidata/rapidata_client/filter/not_filter.py +17 -5
- rapidata/rapidata_client/filter/or_filter.py +20 -5
- rapidata/rapidata_client/filter/rapidata_filters.py +12 -9
- rapidata/rapidata_client/filter/response_count_filter.py +6 -0
- rapidata/rapidata_client/filter/user_score_filter.py +17 -5
- rapidata/rapidata_client/order/_rapidata_dataset.py +45 -17
- rapidata/rapidata_client/order/_rapidata_order_builder.py +19 -13
- rapidata/rapidata_client/order/rapidata_order.py +60 -48
- rapidata/rapidata_client/order/rapidata_order_manager.py +231 -197
- rapidata/rapidata_client/order/rapidata_results.py +71 -57
- rapidata/rapidata_client/rapidata_client.py +36 -23
- rapidata/rapidata_client/referee/__init__.py +1 -1
- rapidata/rapidata_client/referee/_base_referee.py +3 -1
- rapidata/rapidata_client/referee/_early_stopping_referee.py +2 -2
- rapidata/rapidata_client/selection/_base_selection.py +6 -0
- rapidata/rapidata_client/selection/ab_test_selection.py +7 -3
- rapidata/rapidata_client/selection/capped_selection.py +2 -2
- rapidata/rapidata_client/selection/conditional_validation_selection.py +12 -6
- rapidata/rapidata_client/selection/demographic_selection.py +9 -6
- rapidata/rapidata_client/selection/rapidata_selections.py +11 -8
- rapidata/rapidata_client/selection/shuffling_selection.py +5 -5
- rapidata/rapidata_client/selection/static_selection.py +5 -10
- rapidata/rapidata_client/selection/validation_selection.py +9 -5
- rapidata/rapidata_client/settings/_rapidata_setting.py +8 -0
- rapidata/rapidata_client/settings/alert_on_fast_response.py +8 -5
- rapidata/rapidata_client/settings/allow_neither_both.py +1 -0
- rapidata/rapidata_client/settings/custom_setting.py +3 -2
- rapidata/rapidata_client/settings/free_text_minimum_characters.py +9 -4
- rapidata/rapidata_client/settings/models/translation_behaviour_options.py +3 -2
- rapidata/rapidata_client/settings/no_shuffle.py +4 -2
- rapidata/rapidata_client/settings/play_video_until_the_end.py +7 -4
- rapidata/rapidata_client/settings/rapidata_settings.py +4 -3
- rapidata/rapidata_client/settings/translation_behaviour.py +7 -5
- rapidata/rapidata_client/validation/rapidata_validation_set.py +23 -17
- rapidata/rapidata_client/validation/rapids/box.py +3 -1
- rapidata/rapidata_client/validation/rapids/rapids.py +7 -1
- rapidata/rapidata_client/validation/rapids/rapids_manager.py +174 -141
- rapidata/rapidata_client/validation/validation_set_manager.py +285 -268
- rapidata/rapidata_client/workflow/__init__.py +1 -1
- rapidata/rapidata_client/workflow/_base_workflow.py +6 -1
- rapidata/rapidata_client/workflow/_classify_workflow.py +6 -0
- rapidata/rapidata_client/workflow/_compare_workflow.py +6 -0
- rapidata/rapidata_client/workflow/_draw_workflow.py +6 -0
- rapidata/rapidata_client/workflow/_evaluation_workflow.py +6 -0
- rapidata/rapidata_client/workflow/_free_text_workflow.py +6 -0
- rapidata/rapidata_client/workflow/_locate_workflow.py +6 -0
- rapidata/rapidata_client/workflow/_ranking_workflow.py +12 -0
- rapidata/rapidata_client/workflow/_select_words_workflow.py +6 -0
- rapidata/rapidata_client/workflow/_timestamp_workflow.py +6 -0
- rapidata/service/__init__.py +1 -1
- rapidata/service/credential_manager.py +1 -1
- rapidata/service/local_file_service.py +9 -8
- rapidata/service/openapi_service.py +2 -2
- {rapidata-2.37.0.dist-info → rapidata-2.39.0.dist-info}/METADATA +4 -1
- {rapidata-2.37.0.dist-info → rapidata-2.39.0.dist-info}/RECORD +114 -107
- rapidata/rapidata_client/logging/__init__.py +0 -2
- rapidata/rapidata_client/logging/logger.py +0 -122
- rapidata/rapidata_client/logging/output_manager.py +0 -20
- {rapidata-2.37.0.dist-info → rapidata-2.39.0.dist-info}/LICENSE +0 -0
- {rapidata-2.37.0.dist-info → rapidata-2.39.0.dist-info}/WHEEL +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Optional
|
|
1
|
+
from typing import Optional, cast
|
|
2
2
|
from rapidata.rapidata_client.benchmark.rapidata_benchmark import RapidataBenchmark
|
|
3
3
|
from rapidata.api_client.models.create_benchmark_model import CreateBenchmarkModel
|
|
4
4
|
from rapidata.service.openapi_service import OpenAPIService
|
|
@@ -9,6 +9,7 @@ from rapidata.api_client.models.filter import Filter
|
|
|
9
9
|
from rapidata.api_client.models.sort_criterion import SortCriterion
|
|
10
10
|
from rapidata.api_client.models.sort_direction import SortDirection
|
|
11
11
|
from rapidata.api_client.models.filter_operator import FilterOperator
|
|
12
|
+
from rapidata.rapidata_client.config import logger, tracer
|
|
12
13
|
|
|
13
14
|
|
|
14
15
|
class RapidataBenchmarkManager:
|
|
@@ -29,7 +30,7 @@ class RapidataBenchmarkManager:
|
|
|
29
30
|
def create_new_benchmark(
|
|
30
31
|
self,
|
|
31
32
|
name: str,
|
|
32
|
-
identifiers: list[str],
|
|
33
|
+
identifiers: Optional[list[str]] = None,
|
|
33
34
|
prompts: Optional[list[str | None]] = None,
|
|
34
35
|
prompt_assets: Optional[list[str | None]] = None,
|
|
35
36
|
tags: Optional[list[list[str] | None]] = None,
|
|
@@ -38,11 +39,11 @@ class RapidataBenchmarkManager:
|
|
|
38
39
|
Creates a new benchmark with the given name, identifiers, prompts, and media assets.
|
|
39
40
|
Everything is matched up by the indexes of the lists.
|
|
40
41
|
|
|
41
|
-
prompts or
|
|
42
|
+
prompts or identifiers must be provided, as well as prompts or prompt_assets.
|
|
42
43
|
|
|
43
44
|
Args:
|
|
44
45
|
name: The name of the benchmark.
|
|
45
|
-
identifiers: The identifiers of the prompts/assets/tags that will be used to match up the media
|
|
46
|
+
identifiers: The identifiers of the prompts/assets/tags that will be used to match up the media. If not provided, it will use the prompts as the identifiers.
|
|
46
47
|
prompts: The prompts that will be registered for the benchmark.
|
|
47
48
|
prompt_assets: The prompt assets that will be registered for the benchmark.
|
|
48
49
|
tags: The tags that will be associated with the prompts to use for filtering the leaderboard results. They will NOT be shown to the users.
|
|
@@ -58,78 +59,133 @@ class RapidataBenchmarkManager:
|
|
|
58
59
|
benchmark = create_new_benchmark(name=name, identifiers=identifiers, prompts=prompts, prompt_assets=prompt_assets, tags=tags)
|
|
59
60
|
```
|
|
60
61
|
"""
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
if prompts and (
|
|
65
|
-
not isinstance(prompts, list)
|
|
66
|
-
or not all(isinstance(prompt, str) or prompt is None for prompt in prompts)
|
|
62
|
+
with tracer.start_as_current_span(
|
|
63
|
+
"RapidataBenchmarkManager.create_new_benchmark"
|
|
67
64
|
):
|
|
68
|
-
|
|
65
|
+
if not isinstance(name, str):
|
|
66
|
+
raise ValueError("Name must be a string.")
|
|
67
|
+
|
|
68
|
+
if prompts and (
|
|
69
|
+
not isinstance(prompts, list)
|
|
70
|
+
or not all(
|
|
71
|
+
isinstance(prompt, str) or prompt is None for prompt in prompts
|
|
72
|
+
)
|
|
73
|
+
):
|
|
74
|
+
raise ValueError("Prompts must be a list of strings or None.")
|
|
75
|
+
|
|
76
|
+
if prompt_assets and (
|
|
77
|
+
not isinstance(prompt_assets, list)
|
|
78
|
+
or not all(
|
|
79
|
+
isinstance(asset, str) or asset is None for asset in prompt_assets
|
|
80
|
+
)
|
|
81
|
+
):
|
|
82
|
+
raise ValueError("Media assets must be a list of strings or None.")
|
|
83
|
+
|
|
84
|
+
if identifiers and (
|
|
85
|
+
not isinstance(identifiers, list)
|
|
86
|
+
or not all(isinstance(identifier, str) for identifier in identifiers)
|
|
87
|
+
):
|
|
88
|
+
raise ValueError("Identifiers must be a list of strings.")
|
|
89
|
+
|
|
90
|
+
if identifiers:
|
|
91
|
+
if not len(set(identifiers)) == len(identifiers):
|
|
92
|
+
raise ValueError("Identifiers must be unique.")
|
|
93
|
+
|
|
94
|
+
if tags is not None:
|
|
95
|
+
if not isinstance(tags, list):
|
|
96
|
+
raise ValueError("Tags must be a list of lists of strings or None.")
|
|
97
|
+
|
|
98
|
+
for tag in tags:
|
|
99
|
+
if tag is not None and (
|
|
100
|
+
not isinstance(tag, list)
|
|
101
|
+
or not all(isinstance(item, str) for item in tag)
|
|
102
|
+
):
|
|
103
|
+
raise ValueError(
|
|
104
|
+
"Tags must be a list of lists of strings or None."
|
|
105
|
+
)
|
|
69
106
|
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
107
|
+
if not identifiers and not prompts:
|
|
108
|
+
raise ValueError(
|
|
109
|
+
"At least one of identifiers or prompts must be provided."
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
if not prompts and not prompt_assets:
|
|
113
|
+
raise ValueError(
|
|
114
|
+
"At least one of prompts or media assets must be provided."
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
if not identifiers:
|
|
118
|
+
assert prompts is not None
|
|
119
|
+
if not len(set(prompts)) == len(prompts):
|
|
120
|
+
raise ValueError(
|
|
121
|
+
"Prompts must be unique. Otherwise use identifiers."
|
|
122
|
+
)
|
|
123
|
+
if any(prompt is None for prompt in prompts):
|
|
124
|
+
raise ValueError(
|
|
125
|
+
"Prompts must not be None. Otherwise use identifiers."
|
|
126
|
+
)
|
|
77
127
|
|
|
78
|
-
|
|
79
|
-
isinstance(identifier, str) for identifier in identifiers
|
|
80
|
-
):
|
|
81
|
-
raise ValueError("Identifiers must be a list of strings.")
|
|
128
|
+
identifiers = cast(list[str], prompts)
|
|
82
129
|
|
|
83
|
-
|
|
84
|
-
raise ValueError("Identifiers and prompts must have the same length.")
|
|
130
|
+
assert identifiers is not None
|
|
85
131
|
|
|
86
|
-
|
|
87
|
-
raise ValueError("Identifiers and media assets must have the same length.")
|
|
132
|
+
expected_length = len(identifiers)
|
|
88
133
|
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
"At least one of prompts or media assets must be provided."
|
|
92
|
-
)
|
|
134
|
+
if not prompts:
|
|
135
|
+
prompts = cast(list[str | None], [None] * expected_length)
|
|
93
136
|
|
|
94
|
-
|
|
95
|
-
|
|
137
|
+
if not prompt_assets:
|
|
138
|
+
prompt_assets = cast(list[str | None], [None] * expected_length)
|
|
96
139
|
|
|
97
|
-
|
|
98
|
-
|
|
140
|
+
if not tags:
|
|
141
|
+
tags = cast(list[list[str] | None], [None] * expected_length)
|
|
99
142
|
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
143
|
+
# At this point, all variables are guaranteed to be lists, not None
|
|
144
|
+
assert prompts is not None
|
|
145
|
+
assert prompt_assets is not None
|
|
146
|
+
assert tags is not None
|
|
147
|
+
|
|
148
|
+
if not (expected_length == len(prompts) == len(prompt_assets) == len(tags)):
|
|
149
|
+
raise ValueError(
|
|
150
|
+
"Identifiers, prompts, media assets, and tags must have the same length or set to None."
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
logger.info("Creating new benchmark %s", name)
|
|
154
|
+
|
|
155
|
+
benchmark_result = self.__openapi_service.benchmark_api.benchmark_post(
|
|
156
|
+
create_benchmark_model=CreateBenchmarkModel(
|
|
157
|
+
name=name,
|
|
158
|
+
)
|
|
103
159
|
)
|
|
104
|
-
)
|
|
105
160
|
|
|
106
|
-
|
|
161
|
+
logger.info("Benchmark created with id %s", benchmark_result.id)
|
|
107
162
|
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
)
|
|
112
|
-
tags_list = tags if tags is not None else [None] * len(identifiers)
|
|
163
|
+
benchmark = RapidataBenchmark(
|
|
164
|
+
name, benchmark_result.id, self.__openapi_service
|
|
165
|
+
)
|
|
113
166
|
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
167
|
+
for identifier, prompt, asset, tag in zip(
|
|
168
|
+
identifiers, prompts, prompt_assets, tags
|
|
169
|
+
):
|
|
170
|
+
benchmark.add_prompt(identifier, prompt, asset, tag)
|
|
118
171
|
|
|
119
|
-
|
|
172
|
+
return benchmark
|
|
120
173
|
|
|
121
174
|
def get_benchmark_by_id(self, id: str) -> RapidataBenchmark:
|
|
122
175
|
"""
|
|
123
176
|
Returns a benchmark by its ID.
|
|
124
177
|
"""
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
178
|
+
with tracer.start_as_current_span(
|
|
179
|
+
"RapidataBenchmarkManager.get_benchmark_by_id"
|
|
180
|
+
):
|
|
181
|
+
benchmark_result = (
|
|
182
|
+
self.__openapi_service.benchmark_api.benchmark_benchmark_id_get(
|
|
183
|
+
benchmark_id=id
|
|
184
|
+
)
|
|
185
|
+
)
|
|
186
|
+
return RapidataBenchmark(
|
|
187
|
+
benchmark_result.name, benchmark_result.id, self.__openapi_service
|
|
128
188
|
)
|
|
129
|
-
)
|
|
130
|
-
return RapidataBenchmark(
|
|
131
|
-
benchmark_result.name, benchmark_result.id, self.__openapi_service
|
|
132
|
-
)
|
|
133
189
|
|
|
134
190
|
def find_benchmarks(
|
|
135
191
|
self, name: str = "", amount: int = 10
|
|
@@ -137,24 +193,27 @@ class RapidataBenchmarkManager:
|
|
|
137
193
|
"""
|
|
138
194
|
Returns a list of benchmarks by their name.
|
|
139
195
|
"""
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
196
|
+
with tracer.start_as_current_span("RapidataBenchmarkManager.find_benchmarks"):
|
|
197
|
+
benchmark_result = self.__openapi_service.benchmark_api.benchmarks_get(
|
|
198
|
+
QueryModel(
|
|
199
|
+
page=PageInfo(index=1, size=amount),
|
|
200
|
+
filter=RootFilter(
|
|
201
|
+
filters=[
|
|
202
|
+
Filter(
|
|
203
|
+
field="Name",
|
|
204
|
+
operator=FilterOperator.CONTAINS,
|
|
205
|
+
value=name,
|
|
206
|
+
)
|
|
207
|
+
]
|
|
208
|
+
),
|
|
209
|
+
sortCriteria=[
|
|
210
|
+
SortCriterion(
|
|
211
|
+
direction=SortDirection.DESC, propertyName="CreatedAt"
|
|
147
212
|
)
|
|
148
|
-
]
|
|
149
|
-
)
|
|
150
|
-
sortCriteria=[
|
|
151
|
-
SortCriterion(
|
|
152
|
-
direction=SortDirection.DESC, propertyName="CreatedAt"
|
|
153
|
-
)
|
|
154
|
-
],
|
|
213
|
+
],
|
|
214
|
+
)
|
|
155
215
|
)
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
]
|
|
216
|
+
return [
|
|
217
|
+
RapidataBenchmark(benchmark.name, benchmark.id, self.__openapi_service)
|
|
218
|
+
for benchmark in benchmark_result.items
|
|
219
|
+
]
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Protocol, runtime_checkable
|
|
3
|
+
from opentelemetry._logs import set_logger_provider
|
|
4
|
+
from opentelemetry.exporter.otlp.proto.http._log_exporter import OTLPLogExporter
|
|
5
|
+
from opentelemetry.sdk._logs import LoggerProvider, LoggingHandler
|
|
6
|
+
from opentelemetry.sdk._logs.export import BatchLogRecordProcessor
|
|
7
|
+
from opentelemetry.sdk.resources import Resource
|
|
8
|
+
from rapidata import __version__
|
|
9
|
+
from .logging_config import LoggingConfig, register_config_handler
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@runtime_checkable
|
|
13
|
+
class LoggerProtocol(Protocol):
|
|
14
|
+
"""Protocol that defines the logger interface for type checking."""
|
|
15
|
+
|
|
16
|
+
def debug(self, msg: object, *args, **kwargs) -> None: ...
|
|
17
|
+
def info(self, msg: object, *args, **kwargs) -> None: ...
|
|
18
|
+
def warning(self, msg: object, *args, **kwargs) -> None: ...
|
|
19
|
+
def warn(self, msg: object, *args, **kwargs) -> None: ...
|
|
20
|
+
def error(self, msg: object, *args, **kwargs) -> None: ...
|
|
21
|
+
def exception(self, msg: object, *args, exc_info=True, **kwargs) -> None: ...
|
|
22
|
+
def critical(self, msg: object, *args, **kwargs) -> None: ...
|
|
23
|
+
def fatal(self, msg: object, *args, **kwargs) -> None: ...
|
|
24
|
+
def log(self, level: int, msg: object, *args, **kwargs) -> None: ...
|
|
25
|
+
def isEnabledFor(self, level: int) -> bool: ...
|
|
26
|
+
def getEffectiveLevel(self) -> int: ...
|
|
27
|
+
def setLevel(self, level: int | str) -> None: ...
|
|
28
|
+
def addHandler(self, handler: logging.Handler) -> None: ...
|
|
29
|
+
def removeHandler(self, handler: logging.Handler) -> None: ...
|
|
30
|
+
@property
|
|
31
|
+
def handlers(self) -> list[logging.Handler]: ...
|
|
32
|
+
@property
|
|
33
|
+
def level(self) -> int: ...
|
|
34
|
+
@property
|
|
35
|
+
def name(self) -> str: ...
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class RapidataLogger:
|
|
39
|
+
"""Logger implementation that updates when the configuration changes."""
|
|
40
|
+
|
|
41
|
+
def __init__(self, name: str = "rapidata"):
|
|
42
|
+
self._logger = logging.getLogger(name)
|
|
43
|
+
self._otlp_initialized = False
|
|
44
|
+
self._otlp_handler = None
|
|
45
|
+
|
|
46
|
+
# Register this logger to receive configuration updates
|
|
47
|
+
register_config_handler(self._handle_config_update)
|
|
48
|
+
|
|
49
|
+
def _handle_config_update(self, config: LoggingConfig) -> None:
|
|
50
|
+
"""Handle configuration updates."""
|
|
51
|
+
self._update_logger(config)
|
|
52
|
+
|
|
53
|
+
def _update_logger(self, config: LoggingConfig) -> None:
|
|
54
|
+
"""Update the logger based on the new configuration."""
|
|
55
|
+
# Initialize OTLP logging only once and only if not disabled
|
|
56
|
+
if not self._otlp_initialized and config.enable_otlp:
|
|
57
|
+
try:
|
|
58
|
+
logger_provider = LoggerProvider(
|
|
59
|
+
resource=Resource.create(
|
|
60
|
+
{
|
|
61
|
+
"service.name": "Rapidata.Python.SDK",
|
|
62
|
+
"service.version": __version__,
|
|
63
|
+
}
|
|
64
|
+
),
|
|
65
|
+
)
|
|
66
|
+
set_logger_provider(logger_provider)
|
|
67
|
+
|
|
68
|
+
exporter = OTLPLogExporter(
|
|
69
|
+
endpoint="https://otlp-sdk.rapidata.ai/v1/logs",
|
|
70
|
+
timeout=30,
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
processor = BatchLogRecordProcessor(
|
|
74
|
+
exporter,
|
|
75
|
+
max_queue_size=2048,
|
|
76
|
+
export_timeout_millis=30000,
|
|
77
|
+
max_export_batch_size=512,
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
logger_provider.add_log_record_processor(processor)
|
|
81
|
+
|
|
82
|
+
# OTLP handler - captures DEBUG and above
|
|
83
|
+
self._otlp_handler = LoggingHandler(logger_provider=logger_provider)
|
|
84
|
+
self._otlp_handler.setLevel(logging.DEBUG) # OTLP gets everything
|
|
85
|
+
|
|
86
|
+
self._otlp_initialized = True
|
|
87
|
+
|
|
88
|
+
except Exception as e:
|
|
89
|
+
self._logger.warning(f"Failed to initialize OTLP logging: {e}")
|
|
90
|
+
import traceback
|
|
91
|
+
|
|
92
|
+
traceback.print_exc()
|
|
93
|
+
|
|
94
|
+
# Console handler with configurable level
|
|
95
|
+
console_handler = logging.StreamHandler()
|
|
96
|
+
console_level = getattr(logging, config.level.upper())
|
|
97
|
+
console_handler.setLevel(console_level)
|
|
98
|
+
console_formatter = logging.Formatter(config.format)
|
|
99
|
+
console_handler.setFormatter(console_formatter)
|
|
100
|
+
|
|
101
|
+
# Configure the logger
|
|
102
|
+
self._logger.setLevel(logging.DEBUG) # Logger must allow DEBUG for OTLP
|
|
103
|
+
|
|
104
|
+
# Remove any existing handlers (except OTLP when appropriate)
|
|
105
|
+
for handler in self._logger.handlers[:]:
|
|
106
|
+
if handler != self._otlp_handler:
|
|
107
|
+
self._logger.removeHandler(handler)
|
|
108
|
+
elif handler == self._otlp_handler and not config.enable_otlp:
|
|
109
|
+
self._logger.removeHandler(handler)
|
|
110
|
+
|
|
111
|
+
# Add OTLP handler if initialized and not disabled
|
|
112
|
+
if (
|
|
113
|
+
self._otlp_handler
|
|
114
|
+
and self._otlp_handler not in self._logger.handlers
|
|
115
|
+
and config.enable_otlp
|
|
116
|
+
):
|
|
117
|
+
self._logger.addHandler(self._otlp_handler)
|
|
118
|
+
|
|
119
|
+
# Add console handler
|
|
120
|
+
self._logger.addHandler(console_handler)
|
|
121
|
+
|
|
122
|
+
# Add file handler if log_file is provided
|
|
123
|
+
if config.log_file:
|
|
124
|
+
file_handler = logging.FileHandler(config.log_file)
|
|
125
|
+
file_handler.setLevel(console_level) # Use same level as console
|
|
126
|
+
file_formatter = logging.Formatter(config.format)
|
|
127
|
+
file_handler.setFormatter(file_formatter)
|
|
128
|
+
self._logger.addHandler(file_handler)
|
|
129
|
+
|
|
130
|
+
def __getattr__(self, name: str) -> object:
|
|
131
|
+
"""Delegate attribute access to the underlying logger."""
|
|
132
|
+
return getattr(self._logger, name)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
logger: LoggerProtocol = RapidataLogger() # type: ignore[assignment]
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
from typing import Callable
|
|
2
|
+
from pydantic import BaseModel, Field
|
|
3
|
+
|
|
4
|
+
# Type alias for config update handlers
|
|
5
|
+
ConfigUpdateHandler = Callable[["LoggingConfig"], None]
|
|
6
|
+
|
|
7
|
+
# Global list to store registered handlers
|
|
8
|
+
_config_handlers: list[ConfigUpdateHandler] = []
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def register_config_handler(handler: ConfigUpdateHandler) -> None:
|
|
12
|
+
"""Register a handler to be called when the logging configuration updates."""
|
|
13
|
+
_config_handlers.append(handler)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def unregister_config_handler(handler: ConfigUpdateHandler) -> None:
|
|
17
|
+
"""Unregister a previously registered handler."""
|
|
18
|
+
if handler in _config_handlers:
|
|
19
|
+
_config_handlers.remove(handler)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class LoggingConfig(BaseModel):
|
|
23
|
+
"""
|
|
24
|
+
Holds the configuration for the logging process.
|
|
25
|
+
|
|
26
|
+
Attributes:
|
|
27
|
+
level (str): The logging level. Defaults to "WARNING".
|
|
28
|
+
log_file (str | None): The logging file. Defaults to None.
|
|
29
|
+
format (str): The logging format. Defaults to "%(asctime)s - %(name)s - %(levelname)s - %(message)s".
|
|
30
|
+
silent_mode (bool): Whether to disable the prints and progress bars. Does NOT affect the logging. Defaults to False.
|
|
31
|
+
enable_otlp (bool): Whether to enable OpenTelemetry trace logs. Defaults to True.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
level: str = Field(default="WARNING")
|
|
35
|
+
log_file: str | None = Field(default=None)
|
|
36
|
+
format: str = Field(default="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
|
37
|
+
silent_mode: bool = Field(default=False)
|
|
38
|
+
enable_otlp: bool = Field(default=True)
|
|
39
|
+
|
|
40
|
+
def __init__(self, **kwargs):
|
|
41
|
+
super().__init__(**kwargs)
|
|
42
|
+
self._notify_handlers()
|
|
43
|
+
|
|
44
|
+
def __setattr__(self, name: str, value) -> None:
|
|
45
|
+
super().__setattr__(name, value)
|
|
46
|
+
self._notify_handlers()
|
|
47
|
+
|
|
48
|
+
def _notify_handlers(self) -> None:
|
|
49
|
+
"""Notify all registered handlers that the configuration has updated."""
|
|
50
|
+
for handler in _config_handlers:
|
|
51
|
+
try:
|
|
52
|
+
handler(self)
|
|
53
|
+
except Exception as e:
|
|
54
|
+
# Log the error but don't let one handler failure break others
|
|
55
|
+
print(f"Warning: Config handler failed: {e}")
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
# Tracer is now handled in tracer.py with event-based updates
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from pydantic import BaseModel, Field
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class OrderConfig(BaseModel):
|
|
5
|
+
"""
|
|
6
|
+
Holds the configuration for the order process.
|
|
7
|
+
|
|
8
|
+
Attributes:
|
|
9
|
+
minOrderDatapointsForValidation (int): The minimum number of datapoints required so that an automatic validationset gets created if no recommended was found. Defaults to 50.
|
|
10
|
+
autoValidationSetSize (int): The maximum size of the auto-generated validation set. Defaults to 20.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
minOrderDatapointsForValidation: int = Field(default=50)
|
|
14
|
+
autoValidationSetSize: int = Field(default=20)
|
|
@@ -1,5 +1,9 @@
|
|
|
1
1
|
from pydantic import BaseModel, Field
|
|
2
2
|
|
|
3
|
+
from rapidata.rapidata_client.config.logging_config import LoggingConfig
|
|
4
|
+
from rapidata.rapidata_client.config.order_config import OrderConfig
|
|
5
|
+
from rapidata.rapidata_client.config.upload_config import UploadConfig
|
|
6
|
+
|
|
3
7
|
|
|
4
8
|
class RapidataConfig(BaseModel):
|
|
5
9
|
"""
|
|
@@ -7,25 +11,26 @@ class RapidataConfig(BaseModel):
|
|
|
7
11
|
|
|
8
12
|
To adjust the configurations used, you can modify the `rapidata_config` object.
|
|
9
13
|
|
|
10
|
-
|
|
11
|
-
maxUploadWorkers (int): The maximum number of worker threads for processing media paths. Defaults to 10.
|
|
12
|
-
uploadMaxRetries (int): The maximum number of retries for failed uploads. Defaults to 3.
|
|
14
|
+
Attributes:
|
|
13
15
|
enableBetaFeatures (bool): Whether to enable beta features. Defaults to False.
|
|
14
|
-
|
|
15
|
-
|
|
16
|
+
upload (UploadConfig): The configuration for the upload process.
|
|
17
|
+
Such as the maximum number of worker threads for processing media paths and the maximum number of retries for failed uploads.
|
|
18
|
+
order (OrderConfig): The configuration for the order process.
|
|
19
|
+
Such as the minimum number of datapoints required so that an automatic validationset gets created if no recommended was found.
|
|
20
|
+
logging (LoggingConfig): The configuration for the logging process.
|
|
21
|
+
Such as the logging level and the logging file.
|
|
16
22
|
|
|
17
23
|
Example:
|
|
18
24
|
```python
|
|
19
25
|
from rapidata import rapidata_config
|
|
20
|
-
rapidata_config.maxUploadWorkers = 20
|
|
26
|
+
rapidata_config.upload.maxUploadWorkers = 20
|
|
21
27
|
```
|
|
22
28
|
"""
|
|
23
29
|
|
|
24
|
-
maxUploadWorkers: int = Field(default=10)
|
|
25
|
-
uploadMaxRetries: int = Field(default=3)
|
|
26
30
|
enableBetaFeatures: bool = False
|
|
27
|
-
|
|
28
|
-
|
|
31
|
+
upload: UploadConfig = Field(default_factory=UploadConfig)
|
|
32
|
+
order: OrderConfig = Field(default_factory=OrderConfig)
|
|
33
|
+
logging: LoggingConfig = Field(default_factory=LoggingConfig)
|
|
29
34
|
|
|
30
35
|
|
|
31
36
|
rapidata_config = RapidataConfig()
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
from typing import Protocol, runtime_checkable, Any
|
|
2
|
+
from opentelemetry import trace
|
|
3
|
+
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
|
|
4
|
+
from opentelemetry.sdk.trace import TracerProvider
|
|
5
|
+
from opentelemetry.sdk.trace.export import BatchSpanProcessor
|
|
6
|
+
from opentelemetry.sdk.resources import Resource
|
|
7
|
+
from rapidata import __version__
|
|
8
|
+
from .logging_config import LoggingConfig, register_config_handler
|
|
9
|
+
from rapidata.rapidata_client.config import logger
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@runtime_checkable
|
|
13
|
+
class TracerProtocol(Protocol):
|
|
14
|
+
"""Protocol that defines the tracer interface for type checking."""
|
|
15
|
+
|
|
16
|
+
def start_span(self, name: str, *args, **kwargs) -> Any: ...
|
|
17
|
+
def start_as_current_span(self, name: str, *args, **kwargs) -> Any: ...
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class NoOpSpan:
|
|
21
|
+
"""A no-op span that does nothing when tracing is disabled."""
|
|
22
|
+
|
|
23
|
+
def __enter__(self):
|
|
24
|
+
return self
|
|
25
|
+
|
|
26
|
+
def __exit__(self, *args):
|
|
27
|
+
pass
|
|
28
|
+
|
|
29
|
+
def set_attribute(self, *args, **kwargs):
|
|
30
|
+
pass
|
|
31
|
+
|
|
32
|
+
def set_status(self, *args, **kwargs):
|
|
33
|
+
pass
|
|
34
|
+
|
|
35
|
+
def add_event(self, *args, **kwargs):
|
|
36
|
+
pass
|
|
37
|
+
|
|
38
|
+
def end(self, *args, **kwargs):
|
|
39
|
+
pass
|
|
40
|
+
|
|
41
|
+
def __getattr__(self, name: str) -> Any:
|
|
42
|
+
"""Return self for any method call to maintain chainability."""
|
|
43
|
+
return lambda *args, **kwargs: self
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class NoOpTracer:
|
|
47
|
+
"""A no-op tracer that returns no-op spans when tracing is disabled."""
|
|
48
|
+
|
|
49
|
+
def start_span(self, name: str, *args, **kwargs) -> NoOpSpan:
|
|
50
|
+
return NoOpSpan()
|
|
51
|
+
|
|
52
|
+
def start_as_current_span(self, name: str, *args, **kwargs) -> NoOpSpan:
|
|
53
|
+
return NoOpSpan()
|
|
54
|
+
|
|
55
|
+
def __getattr__(self, name: str) -> Any:
|
|
56
|
+
"""Delegate to no-op behavior."""
|
|
57
|
+
return lambda *args, **kwargs: NoOpSpan()
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class RapidataTracer:
|
|
61
|
+
"""Tracer implementation that updates when the configuration changes."""
|
|
62
|
+
|
|
63
|
+
def __init__(self, name: str = __name__):
|
|
64
|
+
self._name = name
|
|
65
|
+
self._otlp_initialized = False
|
|
66
|
+
self._tracer_provider = None
|
|
67
|
+
self._real_tracer = None
|
|
68
|
+
self._no_op_tracer = NoOpTracer()
|
|
69
|
+
self._enabled = True # Default to enabled
|
|
70
|
+
|
|
71
|
+
# Register this tracer to receive configuration updates
|
|
72
|
+
register_config_handler(self._handle_config_update)
|
|
73
|
+
|
|
74
|
+
def _handle_config_update(self, config: LoggingConfig) -> None:
|
|
75
|
+
"""Handle configuration updates."""
|
|
76
|
+
self._update_tracer(config)
|
|
77
|
+
|
|
78
|
+
def _update_tracer(self, config: LoggingConfig) -> None:
|
|
79
|
+
"""Update the tracer based on the new configuration."""
|
|
80
|
+
self._enabled = config.enable_otlp
|
|
81
|
+
|
|
82
|
+
# Initialize OTLP tracing only once and only if not disabled
|
|
83
|
+
if not self._otlp_initialized and config.enable_otlp:
|
|
84
|
+
try:
|
|
85
|
+
resource = Resource.create(
|
|
86
|
+
{
|
|
87
|
+
"service.name": "Rapidata.Python.SDK",
|
|
88
|
+
"service.version": __version__,
|
|
89
|
+
}
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
self._tracer_provider = TracerProvider(resource=resource)
|
|
93
|
+
trace.set_tracer_provider(self._tracer_provider)
|
|
94
|
+
|
|
95
|
+
exporter = OTLPSpanExporter(
|
|
96
|
+
endpoint="https://otlp-sdk.rapidata.ai/v1/traces",
|
|
97
|
+
timeout=30,
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
span_processor = BatchSpanProcessor(exporter)
|
|
101
|
+
self._tracer_provider.add_span_processor(span_processor)
|
|
102
|
+
|
|
103
|
+
self._real_tracer = trace.get_tracer(self._name)
|
|
104
|
+
self._otlp_initialized = True
|
|
105
|
+
|
|
106
|
+
except Exception as e:
|
|
107
|
+
logger.warning(f"Failed to initialize tracing: {e}")
|
|
108
|
+
self._enabled = False
|
|
109
|
+
|
|
110
|
+
def start_span(self, name: str, *args, **kwargs) -> Any:
|
|
111
|
+
"""Start a span, or return a no-op span if tracing is disabled."""
|
|
112
|
+
if self._enabled and self._real_tracer:
|
|
113
|
+
return self._real_tracer.start_span(name, *args, **kwargs)
|
|
114
|
+
return self._no_op_tracer.start_span(name, *args, **kwargs)
|
|
115
|
+
|
|
116
|
+
def start_as_current_span(self, name: str, *args, **kwargs) -> Any:
|
|
117
|
+
"""Start a span as current, or return a no-op span if tracing is disabled."""
|
|
118
|
+
if self._enabled and self._real_tracer:
|
|
119
|
+
return self._real_tracer.start_as_current_span(name, *args, **kwargs)
|
|
120
|
+
return self._no_op_tracer.start_as_current_span(name, *args, **kwargs)
|
|
121
|
+
|
|
122
|
+
def __getattr__(self, name: str) -> Any:
|
|
123
|
+
"""Delegate attribute access to the appropriate tracer."""
|
|
124
|
+
if self._enabled and self._real_tracer:
|
|
125
|
+
return getattr(self._real_tracer, name)
|
|
126
|
+
return getattr(self._no_op_tracer, name)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
# Create the main tracer instance - type checkers will see it as TracerProtocol
|
|
130
|
+
tracer: TracerProtocol = RapidataTracer() # type: ignore[assignment]
|