datarobot-moderations 11.1.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datarobot_dome/__init__.py +11 -0
- datarobot_dome/async_http_client.py +248 -0
- datarobot_dome/chat_helper.py +227 -0
- datarobot_dome/constants.py +318 -0
- datarobot_dome/drum_integration.py +977 -0
- datarobot_dome/guard.py +736 -0
- datarobot_dome/guard_executor.py +755 -0
- datarobot_dome/guard_helpers.py +457 -0
- datarobot_dome/guards/__init__.py +11 -0
- datarobot_dome/guards/guard_llm_mixin.py +232 -0
- datarobot_dome/llm.py +148 -0
- datarobot_dome/metrics/__init__.py +11 -0
- datarobot_dome/metrics/citation_metrics.py +98 -0
- datarobot_dome/metrics/factory.py +52 -0
- datarobot_dome/metrics/metric_scorer.py +78 -0
- datarobot_dome/pipeline/__init__.py +11 -0
- datarobot_dome/pipeline/llm_pipeline.py +474 -0
- datarobot_dome/pipeline/pipeline.py +376 -0
- datarobot_dome/pipeline/vdb_pipeline.py +127 -0
- datarobot_dome/streaming.py +395 -0
- datarobot_moderations-11.1.12.dist-info/METADATA +113 -0
- datarobot_moderations-11.1.12.dist-info/RECORD +23 -0
- datarobot_moderations-11.1.12.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,474 @@
|
|
|
1
|
+
# ---------------------------------------------------------------------------------
|
|
2
|
+
# Copyright (c) 2025 DataRobot, Inc. and its affiliates. All rights reserved.
|
|
3
|
+
# Last updated 2025.
|
|
4
|
+
#
|
|
5
|
+
# DataRobot, Inc. Confidential.
|
|
6
|
+
# This is proprietary source code of DataRobot, Inc. and its affiliates.
|
|
7
|
+
#
|
|
8
|
+
# This file and its contents are subject to DataRobot Tool and Utility Agreement.
|
|
9
|
+
# For details, see
|
|
10
|
+
# https://www.datarobot.com/wp-content/uploads/2021/07/DataRobot-Tool-and-Utility-Agreement.pdf.
|
|
11
|
+
# ---------------------------------------------------------------------------------
|
|
12
|
+
import logging
|
|
13
|
+
import math
|
|
14
|
+
import os
|
|
15
|
+
from datetime import datetime
|
|
16
|
+
from datetime import timezone
|
|
17
|
+
|
|
18
|
+
import numpy as np
|
|
19
|
+
import yaml
|
|
20
|
+
from datarobot.enums import CustomMetricAggregationType
|
|
21
|
+
from datarobot.enums import CustomMetricDirectionality
|
|
22
|
+
|
|
23
|
+
from datarobot_dome.async_http_client import AsyncHTTPClient
|
|
24
|
+
from datarobot_dome.constants import CUSTOM_METRIC_DESCRIPTION_SUFFIX
|
|
25
|
+
from datarobot_dome.constants import DEFAULT_PROMPT_COLUMN_NAME
|
|
26
|
+
from datarobot_dome.constants import LOGGER_NAME_PREFIX
|
|
27
|
+
from datarobot_dome.constants import GuardAction
|
|
28
|
+
from datarobot_dome.constants import GuardOperatorType
|
|
29
|
+
from datarobot_dome.constants import GuardStage
|
|
30
|
+
from datarobot_dome.guard import GuardFactory
|
|
31
|
+
from datarobot_dome.guard import moderation_config_trafaret
|
|
32
|
+
from datarobot_dome.guard_helpers import get_rouge_1_scorer
|
|
33
|
+
from datarobot_dome.pipeline.pipeline import Pipeline
|
|
34
|
+
|
|
35
|
+
CUSTOM_METRICS_BULK_UPLOAD_API_PREFIX = "deployments"
|
|
36
|
+
CUSTOM_METRICS_BULK_UPLOAD_API_SUFFIX = "customMetrics/bulkUpload/"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def get_stage_str(stage):
|
|
40
|
+
return "Prompts" if stage == GuardStage.PROMPT else "Responses"
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def get_blocked_custom_metric(stage):
|
|
44
|
+
return {
|
|
45
|
+
"name": f"Blocked {get_stage_str(stage)}",
|
|
46
|
+
"directionality": CustomMetricDirectionality.LOWER_IS_BETTER,
|
|
47
|
+
"units": "count",
|
|
48
|
+
"type": CustomMetricAggregationType.SUM,
|
|
49
|
+
"baselineValue": 0,
|
|
50
|
+
"isModelSpecific": True,
|
|
51
|
+
"timeStep": "hour",
|
|
52
|
+
"description": (
|
|
53
|
+
f"Number of blocked {get_stage_str(stage)}. {CUSTOM_METRIC_DESCRIPTION_SUFFIX}"
|
|
54
|
+
),
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def get_total_custom_metric(stage):
|
|
59
|
+
return {
|
|
60
|
+
"name": f"Total {get_stage_str(stage)}",
|
|
61
|
+
"directionality": CustomMetricDirectionality.HIGHER_IS_BETTER,
|
|
62
|
+
"units": "count",
|
|
63
|
+
"type": CustomMetricAggregationType.SUM,
|
|
64
|
+
"baselineValue": 0,
|
|
65
|
+
"isModelSpecific": True,
|
|
66
|
+
"timeStep": "hour",
|
|
67
|
+
"description": (
|
|
68
|
+
f"Total Number of {get_stage_str(stage)}. {CUSTOM_METRIC_DESCRIPTION_SUFFIX}"
|
|
69
|
+
),
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
prescore_guard_latency_custom_metric = {
|
|
74
|
+
"name": "Prescore Guard Latency",
|
|
75
|
+
"directionality": CustomMetricDirectionality.LOWER_IS_BETTER,
|
|
76
|
+
"units": "seconds",
|
|
77
|
+
"type": CustomMetricAggregationType.AVERAGE,
|
|
78
|
+
"baselineValue": 0,
|
|
79
|
+
"isModelSpecific": True,
|
|
80
|
+
"timeStep": "hour",
|
|
81
|
+
"description": f"Latency to execute prescore guards. {CUSTOM_METRIC_DESCRIPTION_SUFFIX}",
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
postscore_guard_latency_custom_metric = {
|
|
85
|
+
"name": "Postscore Guard Latency",
|
|
86
|
+
"directionality": CustomMetricDirectionality.LOWER_IS_BETTER,
|
|
87
|
+
"units": "seconds",
|
|
88
|
+
"type": CustomMetricAggregationType.AVERAGE,
|
|
89
|
+
"baselineValue": 0,
|
|
90
|
+
"isModelSpecific": True,
|
|
91
|
+
"timeStep": "hour",
|
|
92
|
+
"description": f"Latency to execute postscore guards. {CUSTOM_METRIC_DESCRIPTION_SUFFIX}",
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
score_latency = {
|
|
96
|
+
"name": "LLM Score Latency",
|
|
97
|
+
"directionality": CustomMetricDirectionality.LOWER_IS_BETTER,
|
|
98
|
+
"units": "seconds",
|
|
99
|
+
"type": CustomMetricAggregationType.AVERAGE,
|
|
100
|
+
"baselineValue": 0,
|
|
101
|
+
"isModelSpecific": True,
|
|
102
|
+
"timeStep": "hour",
|
|
103
|
+
"description": f"Latency of actual LLM Score. {CUSTOM_METRIC_DESCRIPTION_SUFFIX}",
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
class LLMPipeline(Pipeline):
|
|
108
|
+
common_message = "Custom Metrics and deployment settings will not be available"
|
|
109
|
+
|
|
110
|
+
def __init__(self, guards_config_filename):
|
|
111
|
+
self._logger = logging.getLogger(LOGGER_NAME_PREFIX + "." + self.__class__.__name__)
|
|
112
|
+
self._pre_score_guards = []
|
|
113
|
+
self._post_score_guards = []
|
|
114
|
+
self._prompt_column_name = None
|
|
115
|
+
self._response_column_name = None
|
|
116
|
+
self._custom_model_dir = os.path.dirname(guards_config_filename)
|
|
117
|
+
|
|
118
|
+
self._modifier_guard_seen = {stage: None for stage in GuardStage.ALL}
|
|
119
|
+
self.auto_generate_association_ids = False
|
|
120
|
+
|
|
121
|
+
# Dictionary of async http clients per process - its important to maintain
|
|
122
|
+
# this when moderation is running with CUSTOM_MODEL_WORKERS > 1
|
|
123
|
+
self.async_http_clients = {}
|
|
124
|
+
|
|
125
|
+
self.rouge_scorer = get_rouge_1_scorer()
|
|
126
|
+
|
|
127
|
+
with open(guards_config_filename) as f:
|
|
128
|
+
input_moderation_config = yaml.safe_load(f)
|
|
129
|
+
|
|
130
|
+
moderation_config = moderation_config_trafaret.check(input_moderation_config)
|
|
131
|
+
self.guard_timeout_sec = moderation_config["timeout_sec"]
|
|
132
|
+
self.guard_timeout_action = moderation_config["timeout_action"]
|
|
133
|
+
self.extra_model_output_for_chat_enabled = moderation_config.get(
|
|
134
|
+
"extra_model_output_for_chat_enabled", True
|
|
135
|
+
)
|
|
136
|
+
super().__init__(async_http_timeout_sec=self.guard_timeout_sec)
|
|
137
|
+
|
|
138
|
+
self._add_default_custom_metrics()
|
|
139
|
+
for guard_config in moderation_config["guards"]:
|
|
140
|
+
if isinstance(guard_config["stage"], list):
|
|
141
|
+
for stage in guard_config["stage"]:
|
|
142
|
+
self._set_guard(guard_config, stage=stage)
|
|
143
|
+
else:
|
|
144
|
+
self._set_guard(guard_config)
|
|
145
|
+
|
|
146
|
+
self.create_custom_metrics_if_any()
|
|
147
|
+
if self._deployment:
|
|
148
|
+
self._prompt_column_name = self._deployment.model.get("prompt")
|
|
149
|
+
self._response_column_name = self._deployment.model["target_name"]
|
|
150
|
+
self._run_llm_in_parallel_with_pre_score_guards = False
|
|
151
|
+
|
|
152
|
+
def get_async_http_client(self):
|
|
153
|
+
# For each process we create one Async HTTP Client and any requests to
|
|
154
|
+
# that process will use that same client.
|
|
155
|
+
pid = os.getpid()
|
|
156
|
+
if pid not in self.async_http_clients:
|
|
157
|
+
self.async_http_clients[pid] = AsyncHTTPClient(self.guard_timeout_sec)
|
|
158
|
+
|
|
159
|
+
return self.async_http_clients[pid]
|
|
160
|
+
|
|
161
|
+
def _get_average_score_metric_definition(self, guard):
|
|
162
|
+
metric_definition = guard.get_average_score_metric(guard.stage)
|
|
163
|
+
if not guard.intervention:
|
|
164
|
+
return metric_definition
|
|
165
|
+
|
|
166
|
+
if guard.intervention.comparator not in [
|
|
167
|
+
GuardOperatorType.GREATER_THAN,
|
|
168
|
+
GuardOperatorType.LESS_THAN,
|
|
169
|
+
]:
|
|
170
|
+
# For all other guard types, its not possible to define baseline value
|
|
171
|
+
return metric_definition
|
|
172
|
+
|
|
173
|
+
metric_definition["baselineValue"] = guard.intervention.threshold
|
|
174
|
+
if guard.intervention.comparator == GuardOperatorType.GREATER_THAN:
|
|
175
|
+
# if threshold is "greater", lower is better and vice-a-versa
|
|
176
|
+
metric_definition["directionality"] = CustomMetricDirectionality.LOWER_IS_BETTER
|
|
177
|
+
else:
|
|
178
|
+
metric_definition["directionality"] = CustomMetricDirectionality.HIGHER_IS_BETTER
|
|
179
|
+
|
|
180
|
+
return metric_definition
|
|
181
|
+
|
|
182
|
+
def _set_guard(self, guard_config, stage=None):
|
|
183
|
+
guard = GuardFactory().create(guard_config, stage=stage, model_dir=self._custom_model_dir)
|
|
184
|
+
|
|
185
|
+
guard_stage = stage if stage else guard.stage
|
|
186
|
+
intervention_action = guard.get_intervention_action()
|
|
187
|
+
|
|
188
|
+
if intervention_action == GuardAction.REPLACE:
|
|
189
|
+
if self._modifier_guard_seen[guard_stage]:
|
|
190
|
+
modifier_guard = self._modifier_guard_seen[guard_stage]
|
|
191
|
+
raise ValueError(
|
|
192
|
+
"Cannot configure more than 1 modifier guards in the "
|
|
193
|
+
f"{guard_config['stage']} stage, "
|
|
194
|
+
f"guard {modifier_guard.name} already present"
|
|
195
|
+
)
|
|
196
|
+
else:
|
|
197
|
+
self._modifier_guard_seen[guard_stage] = guard
|
|
198
|
+
self._add_guard_to_pipeline(guard)
|
|
199
|
+
guard.set_pipeline(self)
|
|
200
|
+
|
|
201
|
+
if guard.has_average_score_custom_metric():
|
|
202
|
+
self.custom_metric_map[guard.get_average_score_custom_metric_name(guard_stage)] = {
|
|
203
|
+
"metric_definition": self._get_average_score_metric_definition(guard)
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
if guard.has_latency_custom_metric():
|
|
207
|
+
self.custom_metric_map[guard.get_latency_custom_metric_name()] = {
|
|
208
|
+
"metric_definition": guard.get_latency_custom_metric()
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
if intervention_action:
|
|
212
|
+
# Enforced metric for all kinds of guards, as long as they have intervention
|
|
213
|
+
# action defined - even for token count
|
|
214
|
+
self.custom_metric_map[
|
|
215
|
+
guard.get_guard_enforced_custom_metric_name(guard_stage, intervention_action)
|
|
216
|
+
] = {
|
|
217
|
+
"metric_definition": guard.get_enforced_custom_metric(
|
|
218
|
+
guard_stage, intervention_action
|
|
219
|
+
)
|
|
220
|
+
}
|
|
221
|
+
self.custom_metrics_no_association_ids.append(guard.get_latency_custom_metric_name())
|
|
222
|
+
|
|
223
|
+
def _add_default_custom_metrics(self):
|
|
224
|
+
"""Default custom metrics"""
|
|
225
|
+
metric_list = [
|
|
226
|
+
get_total_custom_metric(GuardStage.PROMPT),
|
|
227
|
+
get_total_custom_metric(GuardStage.RESPONSE),
|
|
228
|
+
prescore_guard_latency_custom_metric,
|
|
229
|
+
postscore_guard_latency_custom_metric,
|
|
230
|
+
score_latency,
|
|
231
|
+
]
|
|
232
|
+
# Metric list so far does not need association id for reporting
|
|
233
|
+
for metric in metric_list:
|
|
234
|
+
self.custom_metrics_no_association_ids.append(metric["name"])
|
|
235
|
+
|
|
236
|
+
metric_list.append(get_blocked_custom_metric(GuardStage.PROMPT))
|
|
237
|
+
metric_list.append(get_blocked_custom_metric(GuardStage.RESPONSE))
|
|
238
|
+
for metric in metric_list:
|
|
239
|
+
self.custom_metric_map[metric["name"]] = {"metric_definition": metric}
|
|
240
|
+
|
|
241
|
+
def _add_guard_to_pipeline(self, guard):
|
|
242
|
+
if guard.stage == GuardStage.PROMPT:
|
|
243
|
+
self._pre_score_guards.append(guard)
|
|
244
|
+
elif guard.stage == GuardStage.RESPONSE:
|
|
245
|
+
self._post_score_guards.append(guard)
|
|
246
|
+
else:
|
|
247
|
+
print("Ignoring invalid guard stage", guard.stage)
|
|
248
|
+
|
|
249
|
+
def report_stage_total_inputs(self, stage, num_rows):
|
|
250
|
+
if self.aggregate_custom_metric is None:
|
|
251
|
+
return
|
|
252
|
+
|
|
253
|
+
entry = self.aggregate_custom_metric[f"Total {get_stage_str(stage)}"]
|
|
254
|
+
self.set_custom_metrics_aggregate_entry(entry, num_rows)
|
|
255
|
+
|
|
256
|
+
def get_prescore_guards(self):
|
|
257
|
+
return self._pre_score_guards
|
|
258
|
+
|
|
259
|
+
def get_postscore_guards(self):
|
|
260
|
+
return self._post_score_guards
|
|
261
|
+
|
|
262
|
+
def report_stage_latency(self, latency_in_sec, stage):
|
|
263
|
+
if self.aggregate_custom_metric is None:
|
|
264
|
+
return
|
|
265
|
+
|
|
266
|
+
if stage == GuardStage.PROMPT:
|
|
267
|
+
metric_name = prescore_guard_latency_custom_metric["name"]
|
|
268
|
+
else:
|
|
269
|
+
metric_name = postscore_guard_latency_custom_metric["name"]
|
|
270
|
+
entry = self.aggregate_custom_metric[metric_name]
|
|
271
|
+
self.set_custom_metrics_aggregate_entry(entry, latency_in_sec)
|
|
272
|
+
|
|
273
|
+
def report_guard_latency(self, guard, latency_in_sec):
|
|
274
|
+
if guard is None or self.aggregate_custom_metric is None:
|
|
275
|
+
return
|
|
276
|
+
|
|
277
|
+
guard_latency_name = guard.get_latency_custom_metric_name()
|
|
278
|
+
entry = self.aggregate_custom_metric[guard_latency_name]
|
|
279
|
+
self.set_custom_metrics_aggregate_entry(entry, latency_in_sec)
|
|
280
|
+
|
|
281
|
+
def report_score_latency(self, latency_in_sec):
|
|
282
|
+
if self.aggregate_custom_metric is None:
|
|
283
|
+
return
|
|
284
|
+
|
|
285
|
+
entry = self.aggregate_custom_metric[score_latency["name"]]
|
|
286
|
+
self.set_custom_metrics_aggregate_entry(entry, latency_in_sec)
|
|
287
|
+
|
|
288
|
+
def get_input_column(self, stage):
|
|
289
|
+
if stage == GuardStage.PROMPT:
|
|
290
|
+
return (
|
|
291
|
+
self._prompt_column_name if self._prompt_column_name else DEFAULT_PROMPT_COLUMN_NAME
|
|
292
|
+
)
|
|
293
|
+
else:
|
|
294
|
+
# DRUM ensures that TARGET_NAME is always set as environment variable, but
|
|
295
|
+
# TARGET_NAME comes in double quotes, remove those
|
|
296
|
+
return (
|
|
297
|
+
self._response_column_name
|
|
298
|
+
if self._response_column_name
|
|
299
|
+
else (os.environ.get("TARGET_NAME").replace('"', ""))
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
def _set_custom_metrics_individual_entry(self, metric_id, value, association_id):
|
|
303
|
+
if isinstance(value, bool):
|
|
304
|
+
_value = 1.0 if value else 0.0
|
|
305
|
+
elif isinstance(value, np.bool_):
|
|
306
|
+
_value = 1.0 if value.item() else 0.0
|
|
307
|
+
elif isinstance(value, np.generic):
|
|
308
|
+
_value = value.item()
|
|
309
|
+
else:
|
|
310
|
+
_value = value
|
|
311
|
+
return {
|
|
312
|
+
"customMetricId": str(metric_id),
|
|
313
|
+
"value": _value,
|
|
314
|
+
"associationId": str(association_id),
|
|
315
|
+
"sampleSize": 1,
|
|
316
|
+
"timestamp": str(datetime.now(timezone.utc).isoformat()),
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
def get_enforced_column_name(self, guard, stage):
|
|
320
|
+
input_column = self.get_input_column(stage)
|
|
321
|
+
intervention_action = guard.get_intervention_action()
|
|
322
|
+
if intervention_action == GuardAction.REPLACE:
|
|
323
|
+
return f"{guard.name}_replaced_{input_column}"
|
|
324
|
+
else:
|
|
325
|
+
return f"{guard.name}_{intervention_action}ed_{input_column}"
|
|
326
|
+
|
|
327
|
+
def get_guard_specific_custom_metric_names(self, guard):
|
|
328
|
+
intervention_action = guard.get_intervention_action()
|
|
329
|
+
metric_list = []
|
|
330
|
+
if guard.has_average_score_custom_metric():
|
|
331
|
+
metric_list = [
|
|
332
|
+
(
|
|
333
|
+
guard.get_average_score_custom_metric_name(guard.stage),
|
|
334
|
+
guard.get_metric_column_name(guard.stage),
|
|
335
|
+
)
|
|
336
|
+
]
|
|
337
|
+
if intervention_action:
|
|
338
|
+
metric_list.append(
|
|
339
|
+
(
|
|
340
|
+
guard.get_guard_enforced_custom_metric_name(guard.stage, intervention_action),
|
|
341
|
+
self.get_enforced_column_name(guard, guard.stage),
|
|
342
|
+
)
|
|
343
|
+
)
|
|
344
|
+
return metric_list
|
|
345
|
+
|
|
346
|
+
def _add_guard_specific_custom_metrics(self, row, guards):
|
|
347
|
+
if len(guards) == 0:
|
|
348
|
+
return []
|
|
349
|
+
|
|
350
|
+
association_id = row[self._association_id_column_name]
|
|
351
|
+
|
|
352
|
+
buckets = []
|
|
353
|
+
for guard in guards:
|
|
354
|
+
for metric_name, column_name in self.get_guard_specific_custom_metric_names(guard):
|
|
355
|
+
if column_name not in row:
|
|
356
|
+
# It is possible metric column is missing if there is exception
|
|
357
|
+
# executing the guard. Just continue with rest
|
|
358
|
+
self._logger.warning(
|
|
359
|
+
f"Missing {column_name} in result for guard {guard.name} "
|
|
360
|
+
f"Not reporting the value with association id {association_id}"
|
|
361
|
+
)
|
|
362
|
+
continue
|
|
363
|
+
if math.isnan(row[column_name]):
|
|
364
|
+
self._logger.warning(
|
|
365
|
+
f"{column_name} in result is NaN for guard {guard.name} "
|
|
366
|
+
f"Not reporting the value with association id {association_id}"
|
|
367
|
+
)
|
|
368
|
+
continue
|
|
369
|
+
custom_metric_id = self.custom_metric_map[metric_name].get("id")
|
|
370
|
+
if custom_metric_id is None:
|
|
371
|
+
self._logger.warning(f"No metric id for '{metric_name}', not reporting")
|
|
372
|
+
continue
|
|
373
|
+
bucket = self._set_custom_metrics_individual_entry(
|
|
374
|
+
custom_metric_id, row[column_name], association_id
|
|
375
|
+
)
|
|
376
|
+
buckets.append(bucket)
|
|
377
|
+
return buckets
|
|
378
|
+
|
|
379
|
+
def _get_blocked_column_name_from_result_df(self, stage):
|
|
380
|
+
input_column_name = self.get_input_column(stage)
|
|
381
|
+
return f"blocked_{input_column_name}"
|
|
382
|
+
|
|
383
|
+
def _set_individual_custom_metrics_entries(self, result_df, payload):
|
|
384
|
+
for index, row in result_df.iterrows():
|
|
385
|
+
association_id = row[self._association_id_column_name]
|
|
386
|
+
for stage in GuardStage.ALL:
|
|
387
|
+
blocked_metric_name = f"Blocked {get_stage_str(stage)}"
|
|
388
|
+
blocked_column_name = self._get_blocked_column_name_from_result_df(stage)
|
|
389
|
+
if blocked_metric_name not in self.custom_metric_map:
|
|
390
|
+
continue
|
|
391
|
+
if blocked_column_name not in result_df.columns:
|
|
392
|
+
continue
|
|
393
|
+
if math.isnan(row[blocked_column_name]):
|
|
394
|
+
# If prompt is blocked, response will be NaN, so don't report it
|
|
395
|
+
continue
|
|
396
|
+
custom_metric_id = self.custom_metric_map[blocked_metric_name].get("id")
|
|
397
|
+
if custom_metric_id is None:
|
|
398
|
+
self._logger.warning(f"No metric id for '{blocked_metric_name}', not reporting")
|
|
399
|
+
continue
|
|
400
|
+
bucket = self._set_custom_metrics_individual_entry(
|
|
401
|
+
custom_metric_id, row[blocked_column_name], association_id
|
|
402
|
+
)
|
|
403
|
+
payload["buckets"].append(bucket)
|
|
404
|
+
|
|
405
|
+
buckets = self._add_guard_specific_custom_metrics(row, self.get_prescore_guards())
|
|
406
|
+
payload["buckets"].extend(buckets)
|
|
407
|
+
buckets = self._add_guard_specific_custom_metrics(row, self.get_postscore_guards())
|
|
408
|
+
payload["buckets"].extend(buckets)
|
|
409
|
+
|
|
410
|
+
def report_custom_metrics(self, result_df):
|
|
411
|
+
if self.delayed_custom_metric_creation:
|
|
412
|
+
# Flag is not set yet, so no point reporting custom metrics
|
|
413
|
+
return
|
|
414
|
+
|
|
415
|
+
if self._association_id_column_name is None:
|
|
416
|
+
return
|
|
417
|
+
|
|
418
|
+
payload = {"buckets": []}
|
|
419
|
+
|
|
420
|
+
if self._association_id_column_name in result_df.columns:
|
|
421
|
+
# Custom metrics are reported only if the association id column
|
|
422
|
+
# is defined and is "present" in result_df
|
|
423
|
+
self._set_individual_custom_metrics_entries(result_df, payload)
|
|
424
|
+
|
|
425
|
+
# Ensure that "Total Prompts" and "Total Responses" are set properly too.
|
|
426
|
+
for stage in GuardStage.ALL:
|
|
427
|
+
entry = self.aggregate_custom_metric[f"Total {get_stage_str(stage)}"]
|
|
428
|
+
if "value" not in entry:
|
|
429
|
+
if stage == GuardStage.PROMPT:
|
|
430
|
+
# If No prompt guards, then all entries are in Total Prompts
|
|
431
|
+
self.set_custom_metrics_aggregate_entry(entry, result_df.shape[0])
|
|
432
|
+
latency_entry = self.aggregate_custom_metric[
|
|
433
|
+
prescore_guard_latency_custom_metric["name"]
|
|
434
|
+
]
|
|
435
|
+
self.set_custom_metrics_aggregate_entry(latency_entry, 0.0)
|
|
436
|
+
else:
|
|
437
|
+
# Prompt guards might have blocked some, so remaining will be
|
|
438
|
+
# Total Responses
|
|
439
|
+
blocked_column_name = self._get_blocked_column_name_from_result_df(
|
|
440
|
+
GuardStage.PROMPT
|
|
441
|
+
)
|
|
442
|
+
value = result_df.shape[0] - ((result_df[blocked_column_name]).sum())
|
|
443
|
+
self.set_custom_metrics_aggregate_entry(entry, value)
|
|
444
|
+
latency_entry = self.aggregate_custom_metric[
|
|
445
|
+
postscore_guard_latency_custom_metric["name"]
|
|
446
|
+
]
|
|
447
|
+
self.set_custom_metrics_aggregate_entry(latency_entry, 0.0)
|
|
448
|
+
|
|
449
|
+
payload = self.add_aggregate_metrics_to_payload(payload)
|
|
450
|
+
self.upload_custom_metrics(payload)
|
|
451
|
+
|
|
452
|
+
async def send_event_async(self, title, message, event_type, guard_name=None, metric_name=None):
|
|
453
|
+
if self._deployment_id is None or self.async_http_client is None:
|
|
454
|
+
return
|
|
455
|
+
|
|
456
|
+
await self.async_http_client.async_report_event(
|
|
457
|
+
title,
|
|
458
|
+
message,
|
|
459
|
+
event_type,
|
|
460
|
+
self._deployment_id,
|
|
461
|
+
guard_name=guard_name,
|
|
462
|
+
metric_name=metric_name,
|
|
463
|
+
)
|
|
464
|
+
|
|
465
|
+
def agentic_metrics_configured(self):
|
|
466
|
+
if len(self.get_postscore_guards()) == 0:
|
|
467
|
+
# All Agentic metrics at response stage only
|
|
468
|
+
return False
|
|
469
|
+
|
|
470
|
+
for guard in self.get_postscore_guards():
|
|
471
|
+
if guard.is_agentic:
|
|
472
|
+
return True
|
|
473
|
+
|
|
474
|
+
return False
|