rapidata 2.36.2__py3-none-any.whl → 2.38.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rapidata might be problematic. Click here for more details.
- rapidata/__init__.py +3 -4
- rapidata/rapidata_client/__init__.py +1 -4
- rapidata/rapidata_client/api/{rapidata_exception.py → rapidata_api_client.py} +119 -2
- rapidata/rapidata_client/benchmark/leaderboard/rapidata_leaderboard.py +88 -46
- rapidata/rapidata_client/benchmark/participant/_participant.py +26 -9
- rapidata/rapidata_client/benchmark/rapidata_benchmark.py +274 -205
- rapidata/rapidata_client/benchmark/rapidata_benchmark_manager.py +98 -76
- rapidata/rapidata_client/config/__init__.py +3 -0
- rapidata/rapidata_client/config/logger.py +135 -0
- rapidata/rapidata_client/config/logging_config.py +58 -0
- rapidata/rapidata_client/config/managed_print.py +6 -0
- rapidata/rapidata_client/config/order_config.py +14 -0
- rapidata/rapidata_client/config/rapidata_config.py +14 -9
- rapidata/rapidata_client/config/tracer.py +130 -0
- rapidata/rapidata_client/config/upload_config.py +14 -0
- rapidata/rapidata_client/datapoints/_datapoint.py +1 -1
- rapidata/rapidata_client/datapoints/assets/_media_asset.py +1 -1
- rapidata/rapidata_client/datapoints/assets/_sessions.py +2 -2
- rapidata/rapidata_client/demographic/demographic_manager.py +16 -14
- rapidata/rapidata_client/filter/_base_filter.py +11 -5
- rapidata/rapidata_client/filter/age_filter.py +9 -3
- rapidata/rapidata_client/filter/and_filter.py +20 -5
- rapidata/rapidata_client/filter/campaign_filter.py +7 -1
- rapidata/rapidata_client/filter/country_filter.py +8 -2
- rapidata/rapidata_client/filter/custom_filter.py +9 -3
- rapidata/rapidata_client/filter/gender_filter.py +9 -3
- rapidata/rapidata_client/filter/language_filter.py +12 -5
- rapidata/rapidata_client/filter/new_user_filter.py +3 -4
- rapidata/rapidata_client/filter/not_filter.py +17 -5
- rapidata/rapidata_client/filter/or_filter.py +20 -5
- rapidata/rapidata_client/filter/response_count_filter.py +6 -0
- rapidata/rapidata_client/filter/user_score_filter.py +17 -5
- rapidata/rapidata_client/order/_rapidata_dataset.py +45 -17
- rapidata/rapidata_client/order/_rapidata_order_builder.py +19 -13
- rapidata/rapidata_client/order/rapidata_order.py +60 -48
- rapidata/rapidata_client/order/rapidata_order_manager.py +239 -195
- rapidata/rapidata_client/order/rapidata_results.py +71 -57
- rapidata/rapidata_client/rapidata_client.py +36 -23
- rapidata/rapidata_client/selection/_base_selection.py +6 -0
- rapidata/rapidata_client/selection/static_selection.py +5 -10
- rapidata/rapidata_client/settings/_rapidata_setting.py +8 -0
- rapidata/rapidata_client/settings/alert_on_fast_response.py +8 -5
- rapidata/rapidata_client/settings/free_text_minimum_characters.py +9 -4
- rapidata/rapidata_client/validation/rapidata_validation_set.py +20 -16
- rapidata/rapidata_client/validation/rapids/rapids.py +7 -1
- rapidata/rapidata_client/validation/validation_set_manager.py +285 -268
- rapidata/rapidata_client/workflow/_base_workflow.py +6 -1
- rapidata/rapidata_client/workflow/_classify_workflow.py +6 -0
- rapidata/rapidata_client/workflow/_compare_workflow.py +6 -0
- rapidata/rapidata_client/workflow/_draw_workflow.py +6 -0
- rapidata/rapidata_client/workflow/_evaluation_workflow.py +6 -0
- rapidata/rapidata_client/workflow/_free_text_workflow.py +6 -0
- rapidata/rapidata_client/workflow/_locate_workflow.py +6 -0
- rapidata/rapidata_client/workflow/_ranking_workflow.py +12 -0
- rapidata/rapidata_client/workflow/_select_words_workflow.py +6 -0
- rapidata/rapidata_client/workflow/_timestamp_workflow.py +6 -0
- rapidata/service/credential_manager.py +1 -1
- rapidata/service/openapi_service.py +2 -2
- {rapidata-2.36.2.dist-info → rapidata-2.38.0.dist-info}/METADATA +4 -1
- {rapidata-2.36.2.dist-info → rapidata-2.38.0.dist-info}/RECORD +62 -59
- rapidata/rapidata_client/logging/__init__.py +0 -2
- rapidata/rapidata_client/logging/logger.py +0 -122
- rapidata/rapidata_client/logging/output_manager.py +0 -20
- {rapidata-2.36.2.dist-info → rapidata-2.38.0.dist-info}/LICENSE +0 -0
- {rapidata-2.36.2.dist-info → rapidata-2.38.0.dist-info}/WHEEL +0 -0
|
@@ -9,6 +9,7 @@ from rapidata.api_client.models.filter import Filter
|
|
|
9
9
|
from rapidata.api_client.models.sort_criterion import SortCriterion
|
|
10
10
|
from rapidata.api_client.models.sort_direction import SortDirection
|
|
11
11
|
from rapidata.api_client.models.filter_operator import FilterOperator
|
|
12
|
+
from rapidata.rapidata_client.config import logger, tracer
|
|
12
13
|
|
|
13
14
|
|
|
14
15
|
class RapidataBenchmarkManager:
|
|
@@ -58,78 +59,96 @@ class RapidataBenchmarkManager:
|
|
|
58
59
|
benchmark = create_new_benchmark(name=name, identifiers=identifiers, prompts=prompts, prompt_assets=prompt_assets, tags=tags)
|
|
59
60
|
```
|
|
60
61
|
"""
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
if prompts and (
|
|
65
|
-
not isinstance(prompts, list)
|
|
66
|
-
or not all(isinstance(prompt, str) or prompt is None for prompt in prompts)
|
|
62
|
+
with tracer.start_as_current_span(
|
|
63
|
+
"RapidataBenchmarkManager.create_new_benchmark"
|
|
67
64
|
):
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
65
|
+
if not isinstance(name, str):
|
|
66
|
+
raise ValueError("Name must be a string.")
|
|
67
|
+
|
|
68
|
+
if prompts and (
|
|
69
|
+
not isinstance(prompts, list)
|
|
70
|
+
or not all(
|
|
71
|
+
isinstance(prompt, str) or prompt is None for prompt in prompts
|
|
72
|
+
)
|
|
73
|
+
):
|
|
74
|
+
raise ValueError("Prompts must be a list of strings or None.")
|
|
75
|
+
|
|
76
|
+
if prompt_assets and (
|
|
77
|
+
not isinstance(prompt_assets, list)
|
|
78
|
+
or not all(
|
|
79
|
+
isinstance(asset, str) or asset is None for asset in prompt_assets
|
|
80
|
+
)
|
|
81
|
+
):
|
|
82
|
+
raise ValueError("Media assets must be a list of strings or None.")
|
|
83
|
+
|
|
84
|
+
if not isinstance(identifiers, list) or not all(
|
|
85
|
+
isinstance(identifier, str) for identifier in identifiers
|
|
86
|
+
):
|
|
87
|
+
raise ValueError("Identifiers must be a list of strings.")
|
|
88
|
+
|
|
89
|
+
if prompts and len(identifiers) != len(prompts):
|
|
90
|
+
raise ValueError("Identifiers and prompts must have the same length.")
|
|
91
|
+
|
|
92
|
+
if prompt_assets and len(identifiers) != len(prompt_assets):
|
|
93
|
+
raise ValueError(
|
|
94
|
+
"Identifiers and media assets must have the same length."
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
if not prompts and not prompt_assets:
|
|
98
|
+
raise ValueError(
|
|
99
|
+
"At least one of prompts or media assets must be provided."
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
if len(set(identifiers)) != len(identifiers):
|
|
103
|
+
raise ValueError("Identifiers must be unique.")
|
|
104
|
+
|
|
105
|
+
if tags and len(identifiers) != len(tags):
|
|
106
|
+
raise ValueError("Identifiers and tags must have the same length.")
|
|
107
|
+
|
|
108
|
+
logger.info("Creating new benchmark %s", name)
|
|
109
|
+
|
|
110
|
+
benchmark_result = self.__openapi_service.benchmark_api.benchmark_post(
|
|
111
|
+
create_benchmark_model=CreateBenchmarkModel(
|
|
112
|
+
name=name,
|
|
113
|
+
)
|
|
74
114
|
)
|
|
75
|
-
):
|
|
76
|
-
raise ValueError("Media assets must be a list of strings or None.")
|
|
77
115
|
|
|
78
|
-
|
|
79
|
-
isinstance(identifier, str) for identifier in identifiers
|
|
80
|
-
):
|
|
81
|
-
raise ValueError("Identifiers must be a list of strings.")
|
|
116
|
+
logger.info("Benchmark created with id %s", benchmark_result.id)
|
|
82
117
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
if prompt_assets and len(identifiers) != len(prompt_assets):
|
|
87
|
-
raise ValueError("Identifiers and media assets must have the same length.")
|
|
88
|
-
|
|
89
|
-
if not prompts and not prompt_assets:
|
|
90
|
-
raise ValueError(
|
|
91
|
-
"At least one of prompts or media assets must be provided."
|
|
118
|
+
benchmark = RapidataBenchmark(
|
|
119
|
+
name, benchmark_result.id, self.__openapi_service
|
|
92
120
|
)
|
|
93
121
|
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
benchmark_result = self.__openapi_service.benchmark_api.benchmark_post(
|
|
101
|
-
create_benchmark_model=CreateBenchmarkModel(
|
|
102
|
-
name=name,
|
|
122
|
+
prompts_list = prompts if prompts is not None else [None] * len(identifiers)
|
|
123
|
+
media_assets_list = (
|
|
124
|
+
prompt_assets
|
|
125
|
+
if prompt_assets is not None
|
|
126
|
+
else [None] * len(identifiers)
|
|
103
127
|
)
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
benchmark = RapidataBenchmark(name, benchmark_result.id, self.__openapi_service)
|
|
107
|
-
|
|
108
|
-
prompts_list = prompts if prompts is not None else [None] * len(identifiers)
|
|
109
|
-
media_assets_list = (
|
|
110
|
-
prompt_assets if prompt_assets is not None else [None] * len(identifiers)
|
|
111
|
-
)
|
|
112
|
-
tags_list = tags if tags is not None else [None] * len(identifiers)
|
|
128
|
+
tags_list = tags if tags is not None else [None] * len(identifiers)
|
|
113
129
|
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
130
|
+
for identifier, prompt, asset, tag in zip(
|
|
131
|
+
identifiers, prompts_list, media_assets_list, tags_list
|
|
132
|
+
):
|
|
133
|
+
benchmark.add_prompt(identifier, prompt, asset, tag)
|
|
118
134
|
|
|
119
|
-
|
|
135
|
+
return benchmark
|
|
120
136
|
|
|
121
137
|
def get_benchmark_by_id(self, id: str) -> RapidataBenchmark:
|
|
122
138
|
"""
|
|
123
139
|
Returns a benchmark by its ID.
|
|
124
140
|
"""
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
141
|
+
with tracer.start_as_current_span(
|
|
142
|
+
"RapidataBenchmarkManager.get_benchmark_by_id"
|
|
143
|
+
):
|
|
144
|
+
benchmark_result = (
|
|
145
|
+
self.__openapi_service.benchmark_api.benchmark_benchmark_id_get(
|
|
146
|
+
benchmark_id=id
|
|
147
|
+
)
|
|
148
|
+
)
|
|
149
|
+
return RapidataBenchmark(
|
|
150
|
+
benchmark_result.name, benchmark_result.id, self.__openapi_service
|
|
128
151
|
)
|
|
129
|
-
)
|
|
130
|
-
return RapidataBenchmark(
|
|
131
|
-
benchmark_result.name, benchmark_result.id, self.__openapi_service
|
|
132
|
-
)
|
|
133
152
|
|
|
134
153
|
def find_benchmarks(
|
|
135
154
|
self, name: str = "", amount: int = 10
|
|
@@ -137,24 +156,27 @@ class RapidataBenchmarkManager:
|
|
|
137
156
|
"""
|
|
138
157
|
Returns a list of benchmarks by their name.
|
|
139
158
|
"""
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
159
|
+
with tracer.start_as_current_span("RapidataBenchmarkManager.find_benchmarks"):
|
|
160
|
+
benchmark_result = self.__openapi_service.benchmark_api.benchmarks_get(
|
|
161
|
+
QueryModel(
|
|
162
|
+
page=PageInfo(index=1, size=amount),
|
|
163
|
+
filter=RootFilter(
|
|
164
|
+
filters=[
|
|
165
|
+
Filter(
|
|
166
|
+
field="Name",
|
|
167
|
+
operator=FilterOperator.CONTAINS,
|
|
168
|
+
value=name,
|
|
169
|
+
)
|
|
170
|
+
]
|
|
171
|
+
),
|
|
172
|
+
sortCriteria=[
|
|
173
|
+
SortCriterion(
|
|
174
|
+
direction=SortDirection.DESC, propertyName="CreatedAt"
|
|
147
175
|
)
|
|
148
|
-
]
|
|
149
|
-
)
|
|
150
|
-
sortCriteria=[
|
|
151
|
-
SortCriterion(
|
|
152
|
-
direction=SortDirection.DESC, propertyName="CreatedAt"
|
|
153
|
-
)
|
|
154
|
-
],
|
|
176
|
+
],
|
|
177
|
+
)
|
|
155
178
|
)
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
]
|
|
179
|
+
return [
|
|
180
|
+
RapidataBenchmark(benchmark.name, benchmark.id, self.__openapi_service)
|
|
181
|
+
for benchmark in benchmark_result.items
|
|
182
|
+
]
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Protocol, runtime_checkable
|
|
3
|
+
from opentelemetry._logs import set_logger_provider
|
|
4
|
+
from opentelemetry.exporter.otlp.proto.http._log_exporter import OTLPLogExporter
|
|
5
|
+
from opentelemetry.sdk._logs import LoggerProvider, LoggingHandler
|
|
6
|
+
from opentelemetry.sdk._logs.export import BatchLogRecordProcessor
|
|
7
|
+
from opentelemetry.sdk.resources import Resource
|
|
8
|
+
from rapidata import __version__
|
|
9
|
+
from .logging_config import LoggingConfig, register_config_handler
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@runtime_checkable
|
|
13
|
+
class LoggerProtocol(Protocol):
|
|
14
|
+
"""Protocol that defines the logger interface for type checking."""
|
|
15
|
+
|
|
16
|
+
def debug(self, msg: object, *args, **kwargs) -> None: ...
|
|
17
|
+
def info(self, msg: object, *args, **kwargs) -> None: ...
|
|
18
|
+
def warning(self, msg: object, *args, **kwargs) -> None: ...
|
|
19
|
+
def warn(self, msg: object, *args, **kwargs) -> None: ...
|
|
20
|
+
def error(self, msg: object, *args, **kwargs) -> None: ...
|
|
21
|
+
def exception(self, msg: object, *args, exc_info=True, **kwargs) -> None: ...
|
|
22
|
+
def critical(self, msg: object, *args, **kwargs) -> None: ...
|
|
23
|
+
def fatal(self, msg: object, *args, **kwargs) -> None: ...
|
|
24
|
+
def log(self, level: int, msg: object, *args, **kwargs) -> None: ...
|
|
25
|
+
def isEnabledFor(self, level: int) -> bool: ...
|
|
26
|
+
def getEffectiveLevel(self) -> int: ...
|
|
27
|
+
def setLevel(self, level: int | str) -> None: ...
|
|
28
|
+
def addHandler(self, handler: logging.Handler) -> None: ...
|
|
29
|
+
def removeHandler(self, handler: logging.Handler) -> None: ...
|
|
30
|
+
@property
|
|
31
|
+
def handlers(self) -> list[logging.Handler]: ...
|
|
32
|
+
@property
|
|
33
|
+
def level(self) -> int: ...
|
|
34
|
+
@property
|
|
35
|
+
def name(self) -> str: ...
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class RapidataLogger:
|
|
39
|
+
"""Logger implementation that updates when the configuration changes."""
|
|
40
|
+
|
|
41
|
+
def __init__(self, name: str = "rapidata"):
|
|
42
|
+
self._logger = logging.getLogger(name)
|
|
43
|
+
self._otlp_initialized = False
|
|
44
|
+
self._otlp_handler = None
|
|
45
|
+
|
|
46
|
+
# Register this logger to receive configuration updates
|
|
47
|
+
register_config_handler(self._handle_config_update)
|
|
48
|
+
|
|
49
|
+
def _handle_config_update(self, config: LoggingConfig) -> None:
|
|
50
|
+
"""Handle configuration updates."""
|
|
51
|
+
self._update_logger(config)
|
|
52
|
+
|
|
53
|
+
def _update_logger(self, config: LoggingConfig) -> None:
|
|
54
|
+
"""Update the logger based on the new configuration."""
|
|
55
|
+
# Initialize OTLP logging only once and only if not disabled
|
|
56
|
+
if not self._otlp_initialized and config.enable_otlp:
|
|
57
|
+
try:
|
|
58
|
+
logger_provider = LoggerProvider(
|
|
59
|
+
resource=Resource.create(
|
|
60
|
+
{
|
|
61
|
+
"service.name": "Rapidata.Python.SDK",
|
|
62
|
+
"service.version": __version__,
|
|
63
|
+
}
|
|
64
|
+
),
|
|
65
|
+
)
|
|
66
|
+
set_logger_provider(logger_provider)
|
|
67
|
+
|
|
68
|
+
exporter = OTLPLogExporter(
|
|
69
|
+
endpoint="https://otlp-sdk.rapidata.ai/v1/logs",
|
|
70
|
+
timeout=30,
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
processor = BatchLogRecordProcessor(
|
|
74
|
+
exporter,
|
|
75
|
+
max_queue_size=2048,
|
|
76
|
+
export_timeout_millis=30000,
|
|
77
|
+
max_export_batch_size=512,
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
logger_provider.add_log_record_processor(processor)
|
|
81
|
+
|
|
82
|
+
# OTLP handler - captures DEBUG and above
|
|
83
|
+
self._otlp_handler = LoggingHandler(logger_provider=logger_provider)
|
|
84
|
+
self._otlp_handler.setLevel(logging.DEBUG) # OTLP gets everything
|
|
85
|
+
|
|
86
|
+
self._otlp_initialized = True
|
|
87
|
+
|
|
88
|
+
except Exception as e:
|
|
89
|
+
self._logger.warning(f"Failed to initialize OTLP logging: {e}")
|
|
90
|
+
import traceback
|
|
91
|
+
|
|
92
|
+
traceback.print_exc()
|
|
93
|
+
|
|
94
|
+
# Console handler with configurable level
|
|
95
|
+
console_handler = logging.StreamHandler()
|
|
96
|
+
console_level = getattr(logging, config.level.upper())
|
|
97
|
+
console_handler.setLevel(console_level)
|
|
98
|
+
console_formatter = logging.Formatter(config.format)
|
|
99
|
+
console_handler.setFormatter(console_formatter)
|
|
100
|
+
|
|
101
|
+
# Configure the logger
|
|
102
|
+
self._logger.setLevel(logging.DEBUG) # Logger must allow DEBUG for OTLP
|
|
103
|
+
|
|
104
|
+
# Remove any existing handlers (except OTLP when appropriate)
|
|
105
|
+
for handler in self._logger.handlers[:]:
|
|
106
|
+
if handler != self._otlp_handler:
|
|
107
|
+
self._logger.removeHandler(handler)
|
|
108
|
+
elif handler == self._otlp_handler and not config.enable_otlp:
|
|
109
|
+
self._logger.removeHandler(handler)
|
|
110
|
+
|
|
111
|
+
# Add OTLP handler if initialized and not disabled
|
|
112
|
+
if (
|
|
113
|
+
self._otlp_handler
|
|
114
|
+
and self._otlp_handler not in self._logger.handlers
|
|
115
|
+
and config.enable_otlp
|
|
116
|
+
):
|
|
117
|
+
self._logger.addHandler(self._otlp_handler)
|
|
118
|
+
|
|
119
|
+
# Add console handler
|
|
120
|
+
self._logger.addHandler(console_handler)
|
|
121
|
+
|
|
122
|
+
# Add file handler if log_file is provided
|
|
123
|
+
if config.log_file:
|
|
124
|
+
file_handler = logging.FileHandler(config.log_file)
|
|
125
|
+
file_handler.setLevel(console_level) # Use same level as console
|
|
126
|
+
file_formatter = logging.Formatter(config.format)
|
|
127
|
+
file_handler.setFormatter(file_formatter)
|
|
128
|
+
self._logger.addHandler(file_handler)
|
|
129
|
+
|
|
130
|
+
def __getattr__(self, name: str) -> object:
|
|
131
|
+
"""Delegate attribute access to the underlying logger."""
|
|
132
|
+
return getattr(self._logger, name)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
logger: LoggerProtocol = RapidataLogger() # type: ignore[assignment]
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
from typing import Callable
|
|
2
|
+
from pydantic import BaseModel, Field
|
|
3
|
+
|
|
4
|
+
# Type alias for config update handlers
|
|
5
|
+
ConfigUpdateHandler = Callable[["LoggingConfig"], None]
|
|
6
|
+
|
|
7
|
+
# Global list to store registered handlers
|
|
8
|
+
_config_handlers: list[ConfigUpdateHandler] = []
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def register_config_handler(handler: ConfigUpdateHandler) -> None:
|
|
12
|
+
"""Register a handler to be called when the logging configuration updates."""
|
|
13
|
+
_config_handlers.append(handler)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def unregister_config_handler(handler: ConfigUpdateHandler) -> None:
|
|
17
|
+
"""Unregister a previously registered handler."""
|
|
18
|
+
if handler in _config_handlers:
|
|
19
|
+
_config_handlers.remove(handler)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class LoggingConfig(BaseModel):
|
|
23
|
+
"""
|
|
24
|
+
Holds the configuration for the logging process.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
level (str): The logging level. Defaults to "WARNING".
|
|
28
|
+
log_file (str | None): The logging file. Defaults to None.
|
|
29
|
+
format (str): The logging format. Defaults to "%(asctime)s - %(name)s - %(levelname)s - %(message)s".
|
|
30
|
+
silent_mode (bool): Whether to disable the prints and progress bars. Does NOT affect the logging. Defaults to False.
|
|
31
|
+
enable_otlp (bool): Whether to enable OpenTelemetry trace logs. Defaults to True.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
level: str = Field(default="WARNING")
|
|
35
|
+
log_file: str | None = Field(default=None)
|
|
36
|
+
format: str = Field(default="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
|
37
|
+
silent_mode: bool = Field(default=False)
|
|
38
|
+
enable_otlp: bool = Field(default=True)
|
|
39
|
+
|
|
40
|
+
def __init__(self, **kwargs):
|
|
41
|
+
super().__init__(**kwargs)
|
|
42
|
+
self._notify_handlers()
|
|
43
|
+
|
|
44
|
+
def __setattr__(self, name: str, value) -> None:
|
|
45
|
+
super().__setattr__(name, value)
|
|
46
|
+
self._notify_handlers()
|
|
47
|
+
|
|
48
|
+
def _notify_handlers(self) -> None:
|
|
49
|
+
"""Notify all registered handlers that the configuration has updated."""
|
|
50
|
+
for handler in _config_handlers:
|
|
51
|
+
try:
|
|
52
|
+
handler(self)
|
|
53
|
+
except Exception as e:
|
|
54
|
+
# Log the error but don't let one handler failure break others
|
|
55
|
+
print(f"Warning: Config handler failed: {e}")
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
# Tracer is now handled in tracer.py with event-based updates
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from pydantic import BaseModel, Field
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class OrderConfig(BaseModel):
|
|
5
|
+
"""
|
|
6
|
+
Holds the configuration for the order process.
|
|
7
|
+
|
|
8
|
+
Args:
|
|
9
|
+
minOrderDatapointsForValidation (int): The minimum number of datapoints required so that an automatic validationset gets created if no recommended was found. Defaults to 50.
|
|
10
|
+
autoValidationSetSize (int): The maximum size of the auto-generated validation set. Defaults to 20.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
minOrderDatapointsForValidation: int = Field(default=50)
|
|
14
|
+
autoValidationSetSize: int = Field(default=20)
|
|
@@ -1,5 +1,9 @@
|
|
|
1
1
|
from pydantic import BaseModel, Field
|
|
2
2
|
|
|
3
|
+
from rapidata.rapidata_client.config.logging_config import LoggingConfig
|
|
4
|
+
from rapidata.rapidata_client.config.order_config import OrderConfig
|
|
5
|
+
from rapidata.rapidata_client.config.upload_config import UploadConfig
|
|
6
|
+
|
|
3
7
|
|
|
4
8
|
class RapidataConfig(BaseModel):
|
|
5
9
|
"""
|
|
@@ -8,24 +12,25 @@ class RapidataConfig(BaseModel):
|
|
|
8
12
|
To adjust the configurations used, you can modify the `rapidata_config` object.
|
|
9
13
|
|
|
10
14
|
Args:
|
|
11
|
-
maxUploadWorkers (int): The maximum number of worker threads for processing media paths. Defaults to 10.
|
|
12
|
-
uploadMaxRetries (int): The maximum number of retries for failed uploads. Defaults to 3.
|
|
13
15
|
enableBetaFeatures (bool): Whether to enable beta features. Defaults to False.
|
|
14
|
-
|
|
15
|
-
|
|
16
|
+
upload (UploadConfig): The configuration for the upload process.
|
|
17
|
+
Such as the maximum number of worker threads for processing media paths and the maximum number of retries for failed uploads.
|
|
18
|
+
order (OrderConfig): The configuration for the order process.
|
|
19
|
+
Such as the minimum number of datapoints required so that an automatic validationset gets created if no recommended was found.
|
|
20
|
+
logging (LoggingConfig): The configuration for the logging process.
|
|
21
|
+
Such as the logging level and the logging file.
|
|
16
22
|
|
|
17
23
|
Example:
|
|
18
24
|
```python
|
|
19
25
|
from rapidata import rapidata_config
|
|
20
|
-
rapidata_config.maxUploadWorkers = 20
|
|
26
|
+
rapidata_config.upload.maxUploadWorkers = 20
|
|
21
27
|
```
|
|
22
28
|
"""
|
|
23
29
|
|
|
24
|
-
maxUploadWorkers: int = Field(default=10)
|
|
25
|
-
uploadMaxRetries: int = Field(default=3)
|
|
26
30
|
enableBetaFeatures: bool = False
|
|
27
|
-
|
|
28
|
-
|
|
31
|
+
upload: UploadConfig = Field(default_factory=UploadConfig)
|
|
32
|
+
order: OrderConfig = Field(default_factory=OrderConfig)
|
|
33
|
+
logging: LoggingConfig = Field(default_factory=LoggingConfig)
|
|
29
34
|
|
|
30
35
|
|
|
31
36
|
rapidata_config = RapidataConfig()
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
from typing import Protocol, runtime_checkable, Any
|
|
2
|
+
from opentelemetry import trace
|
|
3
|
+
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
|
|
4
|
+
from opentelemetry.sdk.trace import TracerProvider
|
|
5
|
+
from opentelemetry.sdk.trace.export import BatchSpanProcessor
|
|
6
|
+
from opentelemetry.sdk.resources import Resource
|
|
7
|
+
from rapidata import __version__
|
|
8
|
+
from .logging_config import LoggingConfig, register_config_handler
|
|
9
|
+
from rapidata.rapidata_client.config import logger
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@runtime_checkable
|
|
13
|
+
class TracerProtocol(Protocol):
|
|
14
|
+
"""Protocol that defines the tracer interface for type checking."""
|
|
15
|
+
|
|
16
|
+
def start_span(self, name: str, *args, **kwargs) -> Any: ...
|
|
17
|
+
def start_as_current_span(self, name: str, *args, **kwargs) -> Any: ...
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class NoOpSpan:
|
|
21
|
+
"""A no-op span that does nothing when tracing is disabled."""
|
|
22
|
+
|
|
23
|
+
def __enter__(self):
|
|
24
|
+
return self
|
|
25
|
+
|
|
26
|
+
def __exit__(self, *args):
|
|
27
|
+
pass
|
|
28
|
+
|
|
29
|
+
def set_attribute(self, *args, **kwargs):
|
|
30
|
+
pass
|
|
31
|
+
|
|
32
|
+
def set_status(self, *args, **kwargs):
|
|
33
|
+
pass
|
|
34
|
+
|
|
35
|
+
def add_event(self, *args, **kwargs):
|
|
36
|
+
pass
|
|
37
|
+
|
|
38
|
+
def end(self, *args, **kwargs):
|
|
39
|
+
pass
|
|
40
|
+
|
|
41
|
+
def __getattr__(self, name: str) -> Any:
|
|
42
|
+
"""Return self for any method call to maintain chainability."""
|
|
43
|
+
return lambda *args, **kwargs: self
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class NoOpTracer:
|
|
47
|
+
"""A no-op tracer that returns no-op spans when tracing is disabled."""
|
|
48
|
+
|
|
49
|
+
def start_span(self, name: str, *args, **kwargs) -> NoOpSpan:
|
|
50
|
+
return NoOpSpan()
|
|
51
|
+
|
|
52
|
+
def start_as_current_span(self, name: str, *args, **kwargs) -> NoOpSpan:
|
|
53
|
+
return NoOpSpan()
|
|
54
|
+
|
|
55
|
+
def __getattr__(self, name: str) -> Any:
|
|
56
|
+
"""Delegate to no-op behavior."""
|
|
57
|
+
return lambda *args, **kwargs: NoOpSpan()
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class RapidataTracer:
|
|
61
|
+
"""Tracer implementation that updates when the configuration changes."""
|
|
62
|
+
|
|
63
|
+
def __init__(self, name: str = __name__):
|
|
64
|
+
self._name = name
|
|
65
|
+
self._otlp_initialized = False
|
|
66
|
+
self._tracer_provider = None
|
|
67
|
+
self._real_tracer = None
|
|
68
|
+
self._no_op_tracer = NoOpTracer()
|
|
69
|
+
self._enabled = True # Default to enabled
|
|
70
|
+
|
|
71
|
+
# Register this tracer to receive configuration updates
|
|
72
|
+
register_config_handler(self._handle_config_update)
|
|
73
|
+
|
|
74
|
+
def _handle_config_update(self, config: LoggingConfig) -> None:
|
|
75
|
+
"""Handle configuration updates."""
|
|
76
|
+
self._update_tracer(config)
|
|
77
|
+
|
|
78
|
+
def _update_tracer(self, config: LoggingConfig) -> None:
|
|
79
|
+
"""Update the tracer based on the new configuration."""
|
|
80
|
+
self._enabled = config.enable_otlp
|
|
81
|
+
|
|
82
|
+
# Initialize OTLP tracing only once and only if not disabled
|
|
83
|
+
if not self._otlp_initialized and config.enable_otlp:
|
|
84
|
+
try:
|
|
85
|
+
resource = Resource.create(
|
|
86
|
+
{
|
|
87
|
+
"service.name": "Rapidata.Python.SDK",
|
|
88
|
+
"service.version": __version__,
|
|
89
|
+
}
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
self._tracer_provider = TracerProvider(resource=resource)
|
|
93
|
+
trace.set_tracer_provider(self._tracer_provider)
|
|
94
|
+
|
|
95
|
+
exporter = OTLPSpanExporter(
|
|
96
|
+
endpoint="https://otlp-sdk.rapidata.ai/v1/traces",
|
|
97
|
+
timeout=30,
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
span_processor = BatchSpanProcessor(exporter)
|
|
101
|
+
self._tracer_provider.add_span_processor(span_processor)
|
|
102
|
+
|
|
103
|
+
self._real_tracer = trace.get_tracer(self._name)
|
|
104
|
+
self._otlp_initialized = True
|
|
105
|
+
|
|
106
|
+
except Exception as e:
|
|
107
|
+
logger.warning(f"Failed to initialize tracing: {e}")
|
|
108
|
+
self._enabled = False
|
|
109
|
+
|
|
110
|
+
def start_span(self, name: str, *args, **kwargs) -> Any:
|
|
111
|
+
"""Start a span, or return a no-op span if tracing is disabled."""
|
|
112
|
+
if self._enabled and self._real_tracer:
|
|
113
|
+
return self._real_tracer.start_span(name, *args, **kwargs)
|
|
114
|
+
return self._no_op_tracer.start_span(name, *args, **kwargs)
|
|
115
|
+
|
|
116
|
+
def start_as_current_span(self, name: str, *args, **kwargs) -> Any:
|
|
117
|
+
"""Start a span as current, or return a no-op span if tracing is disabled."""
|
|
118
|
+
if self._enabled and self._real_tracer:
|
|
119
|
+
return self._real_tracer.start_as_current_span(name, *args, **kwargs)
|
|
120
|
+
return self._no_op_tracer.start_as_current_span(name, *args, **kwargs)
|
|
121
|
+
|
|
122
|
+
def __getattr__(self, name: str) -> Any:
|
|
123
|
+
"""Delegate attribute access to the appropriate tracer."""
|
|
124
|
+
if self._enabled and self._real_tracer:
|
|
125
|
+
return getattr(self._real_tracer, name)
|
|
126
|
+
return getattr(self._no_op_tracer, name)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
# Create the main tracer instance - type checkers will see it as TracerProtocol
|
|
130
|
+
tracer: TracerProtocol = RapidataTracer() # type: ignore[assignment]
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from pydantic import BaseModel, Field
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class UploadConfig(BaseModel):
|
|
5
|
+
"""
|
|
6
|
+
Holds the configuration for the upload process.
|
|
7
|
+
|
|
8
|
+
Args:
|
|
9
|
+
maxWorkers (int): The maximum number of worker threads for processing media paths. Defaults to 10.
|
|
10
|
+
maxRetries (int): The maximum number of retries for failed uploads. Defaults to 3.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
maxWorkers: int = Field(default=10)
|
|
14
|
+
maxRetries: int = Field(default=3)
|
|
@@ -24,7 +24,7 @@ from rapidata.rapidata_client.datapoints.metadata._media_asset_metadata import (
|
|
|
24
24
|
MediaAssetMetadata,
|
|
25
25
|
)
|
|
26
26
|
from rapidata.rapidata_client.datapoints.metadata._prompt_metadata import PromptMetadata
|
|
27
|
-
from rapidata.rapidata_client.
|
|
27
|
+
from rapidata.rapidata_client.config import logger
|
|
28
28
|
|
|
29
29
|
|
|
30
30
|
class Datapoint:
|
|
@@ -17,7 +17,7 @@ from pydantic import StrictStr, StrictBytes
|
|
|
17
17
|
import logging
|
|
18
18
|
from functools import cached_property
|
|
19
19
|
from rapidata.rapidata_client.datapoints.assets._sessions import SessionManager
|
|
20
|
-
from rapidata.rapidata_client.
|
|
20
|
+
from rapidata.rapidata_client.config import logger
|
|
21
21
|
from rapidata.rapidata_client.datapoints.assets.constants import (
|
|
22
22
|
ALLOWED_IMAGE_EXTENSIONS,
|
|
23
23
|
ALLOWED_MEDIA_EXTENSIONS,
|
|
@@ -18,8 +18,8 @@ class SessionManager:
|
|
|
18
18
|
requests.Session: A singleton requests session with retry logic.
|
|
19
19
|
"""
|
|
20
20
|
if cls._session is None:
|
|
21
|
-
max_retries: int = rapidata_config.
|
|
22
|
-
max_workers: int = rapidata_config.
|
|
21
|
+
max_retries: int = rapidata_config.upload.maxRetries
|
|
22
|
+
max_workers: int = rapidata_config.upload.maxWorkers
|
|
23
23
|
cls._session = requests.Session()
|
|
24
24
|
retries = Retry(
|
|
25
25
|
total=max_retries,
|