datarobot-moderations 11.1.20__py3-none-any.whl → 11.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datarobot_dome/drum_integration.py +15 -64
- datarobot_dome/metrics/citation_metrics.py +34 -0
- datarobot_dome/metrics/metric_scorer.py +5 -5
- datarobot_dome/pipeline/llm_pipeline.py +21 -48
- datarobot_dome/pipeline/pipeline.py +50 -16
- datarobot_dome/pipeline/vdb_pipeline.py +126 -18
- {datarobot_moderations-11.1.20.dist-info → datarobot_moderations-11.2.0.dist-info}/METADATA +1 -1
- {datarobot_moderations-11.1.20.dist-info → datarobot_moderations-11.2.0.dist-info}/RECORD +9 -9
- {datarobot_moderations-11.1.20.dist-info → datarobot_moderations-11.2.0.dist-info}/WHEEL +0 -0
|
@@ -22,6 +22,7 @@ from typing import Optional
|
|
|
22
22
|
|
|
23
23
|
import numpy as np
|
|
24
24
|
import pandas as pd
|
|
25
|
+
import yaml
|
|
25
26
|
from openai.types.chat import ChatCompletionChunk
|
|
26
27
|
from openai.types.chat.chat_completion import ChatCompletion
|
|
27
28
|
from openai.types.chat.chat_completion import Choice
|
|
@@ -257,11 +258,6 @@ def run_user_score_function(filtered_df, model, pipeline, drum_score_fn, **kwarg
|
|
|
257
258
|
return valid_predictions_df, none_predictions_df, score_latency
|
|
258
259
|
|
|
259
260
|
|
|
260
|
-
def _auto_generate_association_ids(num_rows):
|
|
261
|
-
_logger.info(f"Auto generating {num_rows} association ids")
|
|
262
|
-
return [str(uuid.uuid4()) for _ in range(num_rows)]
|
|
263
|
-
|
|
264
|
-
|
|
265
261
|
def guard_score_wrapper(data, model, pipeline, drum_score_fn, **kwargs):
|
|
266
262
|
"""
|
|
267
263
|
Score wrapper function provided by the moderation library. DRUM will invoke this
|
|
@@ -291,7 +287,7 @@ def guard_score_wrapper(data, model, pipeline, drum_score_fn, **kwargs):
|
|
|
291
287
|
and association_id_column_name not in data.columns
|
|
292
288
|
and pipeline.auto_generate_association_ids
|
|
293
289
|
):
|
|
294
|
-
data[association_id_column_name] =
|
|
290
|
+
data[association_id_column_name] = pipeline.generate_association_ids(data.shape[0])
|
|
295
291
|
|
|
296
292
|
# ==================================================================
|
|
297
293
|
# Step 1: Prescore Guards processing
|
|
@@ -721,7 +717,7 @@ def guard_chat_wrapper(
|
|
|
721
717
|
if association_id:
|
|
722
718
|
data[association_id_column_name] = [association_id]
|
|
723
719
|
elif pipeline.auto_generate_association_ids:
|
|
724
|
-
data[association_id_column_name] =
|
|
720
|
+
data[association_id_column_name] = pipeline.generate_association_ids(1)
|
|
725
721
|
association_id = data[association_id_column_name].tolist()[0]
|
|
726
722
|
|
|
727
723
|
# ==================================================================
|
|
@@ -849,63 +845,18 @@ def guard_chat_wrapper(
|
|
|
849
845
|
)
|
|
850
846
|
|
|
851
847
|
|
|
852
|
-
def
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
"""
|
|
856
|
-
A wrapper to execute vdb's `score` method. Wrapper is useful to calculate the
|
|
857
|
-
latency of the `score` method and handle any exceptional conditions
|
|
858
|
-
Returns:
|
|
859
|
-
predictions_df: DataFrame obtained as a return value from user's `score`
|
|
860
|
-
method
|
|
861
|
-
"""
|
|
862
|
-
start_time = time.time()
|
|
863
|
-
|
|
864
|
-
try:
|
|
865
|
-
predictions_df = drum_score_fn(input_df, model, **kwargs)
|
|
866
|
-
except Exception as e:
|
|
867
|
-
title = "Failed to execute vdb score function"
|
|
868
|
-
message = f"Exception: {e}"
|
|
869
|
-
_logger.error(title + " " + message)
|
|
870
|
-
pd.set_option("display.max_columns", None)
|
|
871
|
-
_logger.error(input_df)
|
|
872
|
-
pipeline.send_event_sync(
|
|
873
|
-
title, message, ModerationEventTypes.MODERATION_MODEL_SCORING_ERROR
|
|
874
|
-
)
|
|
875
|
-
raise
|
|
876
|
-
|
|
877
|
-
score_latency = time.time() - start_time
|
|
878
|
-
pipeline.record_score_latency(score_latency)
|
|
879
|
-
return predictions_df
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
def vdb_score_wrapper(data: pd.DataFrame, model, pipeline: VDBPipeline, drum_score_fn, **kwargs):
|
|
883
|
-
"""
|
|
884
|
-
Run on each prediction, and takes care of running the "score" function as well
|
|
885
|
-
as collecting the metrics for the VDB pipeline.
|
|
886
|
-
"""
|
|
887
|
-
_logger.debug(data)
|
|
888
|
-
|
|
889
|
-
# clear/allocate memory for reporting metrics
|
|
890
|
-
pipeline.get_new_metrics_payload()
|
|
891
|
-
|
|
892
|
-
# NOTE: no "pre-score" calculation on the DataFrame for the predictions
|
|
893
|
-
|
|
894
|
-
# perform the main "score" function for this model
|
|
895
|
-
predictions_df = run_vdb_score_function(data, model, pipeline, drum_score_fn, **kwargs)
|
|
896
|
-
|
|
897
|
-
# loop through all the metrics scoring for the pipeline with predictions_df that has citations
|
|
898
|
-
for scorer in pipeline.scorers():
|
|
899
|
-
value = scorer.score(predictions_df)
|
|
900
|
-
pipeline.record_aggregate_value(scorer.name, value)
|
|
901
|
-
|
|
902
|
-
pipeline.report_custom_metrics()
|
|
903
|
-
return predictions_df
|
|
848
|
+
def vdb_init(model_dir: str = os.getcwd()):
|
|
849
|
+
"""Initializes a VDB pipeline."""
|
|
850
|
+
config = {}
|
|
904
851
|
|
|
852
|
+
config_file = os.path.join(model_dir, MODERATION_CONFIG_FILE_NAME)
|
|
853
|
+
if not os.path.exists(config_file):
|
|
854
|
+
_logger.info(f"No config file ({config_file}) found")
|
|
855
|
+
else:
|
|
856
|
+
with open(config_file) as fp:
|
|
857
|
+
config = yaml.safe_load(fp)
|
|
905
858
|
|
|
906
|
-
|
|
907
|
-
"""Initializes a VDB pipeline."""
|
|
908
|
-
return VDBPipeline()
|
|
859
|
+
return VDBPipeline(config)
|
|
909
860
|
|
|
910
861
|
|
|
911
862
|
def init(model_dir: str = os.getcwd()):
|
|
@@ -992,7 +943,7 @@ class VdbModerationPipeline(ModerationPipeline):
|
|
|
992
943
|
|
|
993
944
|
def score(self, data: pd.DataFrame, model, drum_score_fn, **kwargs):
|
|
994
945
|
"""Calls the VDB score function."""
|
|
995
|
-
return
|
|
946
|
+
return self._pipeline.score(data, model, drum_score_fn, **kwargs)
|
|
996
947
|
|
|
997
948
|
|
|
998
949
|
def moderation_pipeline_factory(
|
|
@@ -1006,7 +957,7 @@ def moderation_pipeline_factory(
|
|
|
1006
957
|
return LlmModerationPipeline(pipeline)
|
|
1007
958
|
|
|
1008
959
|
if target_type in TargetType.vdb():
|
|
1009
|
-
pipeline = vdb_init()
|
|
960
|
+
pipeline = vdb_init(model_dir=model_dir)
|
|
1010
961
|
if pipeline:
|
|
1011
962
|
return VdbModerationPipeline(pipeline)
|
|
1012
963
|
|
|
@@ -29,6 +29,13 @@ class CitationTokenCountScorer(MetricScorer):
|
|
|
29
29
|
BASELINE_VALUE = 0
|
|
30
30
|
INPUT_COLUMN = CITATION_COLUMN
|
|
31
31
|
|
|
32
|
+
def score_rows(self, df: pd.DataFrame) -> list[float]:
|
|
33
|
+
column = self.input_column
|
|
34
|
+
if column not in df.columns:
|
|
35
|
+
return []
|
|
36
|
+
|
|
37
|
+
return [sum(get_token_count(v, self.encoding) for v in cell) for cell in df[column].values]
|
|
38
|
+
|
|
32
39
|
def score(self, df: pd.DataFrame) -> float:
|
|
33
40
|
column = self.input_column
|
|
34
41
|
if column not in df.columns:
|
|
@@ -48,6 +55,19 @@ class CitationTokenAverageScorer(MetricScorer):
|
|
|
48
55
|
BASELINE_VALUE = 0
|
|
49
56
|
INPUT_COLUMN = CITATION_COLUMN
|
|
50
57
|
|
|
58
|
+
def score_rows(self, df: pd.DataFrame) -> []:
|
|
59
|
+
column = self.input_column
|
|
60
|
+
if column not in df.columns:
|
|
61
|
+
return []
|
|
62
|
+
|
|
63
|
+
averages = []
|
|
64
|
+
for cell in df[column].values:
|
|
65
|
+
total = sum(get_token_count(v, self.encoding) for v in cell)
|
|
66
|
+
count = sum(v != "" for v in cell)
|
|
67
|
+
averages.append(total / count)
|
|
68
|
+
|
|
69
|
+
return averages
|
|
70
|
+
|
|
51
71
|
def score(self, df: pd.DataFrame) -> float:
|
|
52
72
|
average = 0.0
|
|
53
73
|
total = 0
|
|
@@ -73,6 +93,13 @@ class DocumentCountScorer(MetricScorer):
|
|
|
73
93
|
BASELINE_VALUE = 0
|
|
74
94
|
INPUT_COLUMN = CITATION_COLUMN
|
|
75
95
|
|
|
96
|
+
def score_rows(self, df: pd.DataFrame) -> list[float]:
|
|
97
|
+
column = self.input_column
|
|
98
|
+
if column not in df.columns:
|
|
99
|
+
return []
|
|
100
|
+
|
|
101
|
+
return [sum(bool(v) for v in cell) for cell in df[column].values]
|
|
102
|
+
|
|
76
103
|
def score(self, df: pd.DataFrame) -> float:
|
|
77
104
|
column = self.input_column
|
|
78
105
|
if column not in df.columns:
|
|
@@ -90,6 +117,13 @@ class DocumentAverageScorer(MetricScorer):
|
|
|
90
117
|
BASELINE_VALUE = 0
|
|
91
118
|
INPUT_COLUMN = CITATION_COLUMN
|
|
92
119
|
|
|
120
|
+
def score_rows(self, df: pd.DataFrame) -> list[float]:
|
|
121
|
+
column = self.input_column
|
|
122
|
+
if column not in df.columns:
|
|
123
|
+
return []
|
|
124
|
+
|
|
125
|
+
return [sum(bool(v) for v in cell) for cell in df[column].values]
|
|
126
|
+
|
|
93
127
|
def score(self, df: pd.DataFrame) -> float:
|
|
94
128
|
column = self.input_column
|
|
95
129
|
if column not in df.columns:
|
|
@@ -61,10 +61,6 @@ class MetricScorer(ABC):
|
|
|
61
61
|
def name(self) -> str:
|
|
62
62
|
return self.config.get("name", self.NAME)
|
|
63
63
|
|
|
64
|
-
@property
|
|
65
|
-
def per_prediction(self) -> bool:
|
|
66
|
-
return self.config.get("per_prediction", False)
|
|
67
|
-
|
|
68
64
|
@property
|
|
69
65
|
def input_column(self) -> str:
|
|
70
66
|
return self.config.get("input_column", self.INPUT_COLUMN)
|
|
@@ -75,4 +71,8 @@ class MetricScorer(ABC):
|
|
|
75
71
|
|
|
76
72
|
@abstractmethod
|
|
77
73
|
def score(self, df: pd.DataFrame) -> float:
|
|
78
|
-
pass
|
|
74
|
+
pass # pragma: no cover
|
|
75
|
+
|
|
76
|
+
@abstractmethod
|
|
77
|
+
def score_rows(self, df: pd.DataFrame) -> list[float]:
|
|
78
|
+
pass # pragma: no cover
|
|
@@ -12,10 +12,7 @@
|
|
|
12
12
|
import logging
|
|
13
13
|
import math
|
|
14
14
|
import os
|
|
15
|
-
from datetime import datetime
|
|
16
|
-
from datetime import timezone
|
|
17
15
|
|
|
18
|
-
import numpy as np
|
|
19
16
|
import yaml
|
|
20
17
|
from datarobot.enums import CustomMetricAggregationType
|
|
21
18
|
from datarobot.enums import CustomMetricDirectionality
|
|
@@ -199,44 +196,37 @@ class LLMPipeline(Pipeline):
|
|
|
199
196
|
guard.set_pipeline(self)
|
|
200
197
|
|
|
201
198
|
if guard.has_average_score_custom_metric():
|
|
202
|
-
self.
|
|
203
|
-
|
|
204
|
-
}
|
|
199
|
+
metric_def = self._get_average_score_metric_definition(guard)
|
|
200
|
+
self.add_custom_metric(metric_def, True)
|
|
205
201
|
|
|
206
202
|
if guard.has_latency_custom_metric():
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
}
|
|
203
|
+
metric_def = guard.get_latency_custom_metric()
|
|
204
|
+
self.add_custom_metric(metric_def, False)
|
|
210
205
|
|
|
211
206
|
if intervention_action:
|
|
212
207
|
# Enforced metric for all kinds of guards, as long as they have intervention
|
|
213
208
|
# action defined - even for token count
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
] = {
|
|
217
|
-
"metric_definition": guard.get_enforced_custom_metric(
|
|
218
|
-
guard_stage, intervention_action
|
|
219
|
-
)
|
|
220
|
-
}
|
|
221
|
-
self.custom_metrics_no_association_ids.append(guard.get_latency_custom_metric_name())
|
|
209
|
+
metric_def = guard.get_enforced_custom_metric(guard_stage, intervention_action)
|
|
210
|
+
self.add_custom_metric(metric_def, True)
|
|
222
211
|
|
|
223
212
|
def _add_default_custom_metrics(self):
|
|
224
213
|
"""Default custom metrics"""
|
|
225
|
-
|
|
214
|
+
# These metrics do not need association id for reporting
|
|
215
|
+
for metric_def in [
|
|
226
216
|
get_total_custom_metric(GuardStage.PROMPT),
|
|
227
217
|
get_total_custom_metric(GuardStage.RESPONSE),
|
|
228
218
|
prescore_guard_latency_custom_metric,
|
|
229
219
|
postscore_guard_latency_custom_metric,
|
|
230
220
|
score_latency,
|
|
231
|
-
]
|
|
232
|
-
|
|
233
|
-
for metric in metric_list:
|
|
234
|
-
self.custom_metrics_no_association_ids.append(metric["name"])
|
|
221
|
+
]:
|
|
222
|
+
self.add_custom_metric(metric_def, False)
|
|
235
223
|
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
224
|
+
# These metrics report with an association-id
|
|
225
|
+
for metric_def in [
|
|
226
|
+
get_blocked_custom_metric(GuardStage.PROMPT),
|
|
227
|
+
get_blocked_custom_metric(GuardStage.RESPONSE),
|
|
228
|
+
]:
|
|
229
|
+
self.add_custom_metric(metric_def, True)
|
|
240
230
|
|
|
241
231
|
def _add_guard_to_pipeline(self, guard):
|
|
242
232
|
if guard.stage == GuardStage.PROMPT:
|
|
@@ -299,23 +289,6 @@ class LLMPipeline(Pipeline):
|
|
|
299
289
|
else (os.environ.get("TARGET_NAME").replace('"', ""))
|
|
300
290
|
)
|
|
301
291
|
|
|
302
|
-
def _set_custom_metrics_individual_entry(self, metric_id, value, association_id):
|
|
303
|
-
if isinstance(value, bool):
|
|
304
|
-
_value = 1.0 if value else 0.0
|
|
305
|
-
elif isinstance(value, np.bool_):
|
|
306
|
-
_value = 1.0 if value.item() else 0.0
|
|
307
|
-
elif isinstance(value, np.generic):
|
|
308
|
-
_value = value.item()
|
|
309
|
-
else:
|
|
310
|
-
_value = value
|
|
311
|
-
return {
|
|
312
|
-
"customMetricId": str(metric_id),
|
|
313
|
-
"value": _value,
|
|
314
|
-
"associationId": str(association_id),
|
|
315
|
-
"sampleSize": 1,
|
|
316
|
-
"timestamp": str(datetime.now(timezone.utc).isoformat()),
|
|
317
|
-
}
|
|
318
|
-
|
|
319
292
|
def get_enforced_column_name(self, guard, stage):
|
|
320
293
|
input_column = self.get_input_column(stage)
|
|
321
294
|
intervention_action = guard.get_intervention_action()
|
|
@@ -366,14 +339,14 @@ class LLMPipeline(Pipeline):
|
|
|
366
339
|
f"Not reporting the value with association id {association_id}"
|
|
367
340
|
)
|
|
368
341
|
continue
|
|
369
|
-
custom_metric_id = self.
|
|
342
|
+
custom_metric_id = self.custom_metric_id_from_name(metric_name)
|
|
370
343
|
if custom_metric_id is None:
|
|
371
344
|
self._logger.warning(f"No metric id for '{metric_name}', not reporting")
|
|
372
345
|
continue
|
|
373
|
-
|
|
346
|
+
item = self.custom_metric_individual_payload(
|
|
374
347
|
custom_metric_id, row[column_name], association_id
|
|
375
348
|
)
|
|
376
|
-
buckets.append(
|
|
349
|
+
buckets.append(item)
|
|
377
350
|
return buckets
|
|
378
351
|
|
|
379
352
|
def _get_blocked_column_name_from_result_df(self, stage):
|
|
@@ -393,11 +366,11 @@ class LLMPipeline(Pipeline):
|
|
|
393
366
|
if math.isnan(row[blocked_column_name]):
|
|
394
367
|
# If prompt is blocked, response will be NaN, so don't report it
|
|
395
368
|
continue
|
|
396
|
-
custom_metric_id = self.
|
|
369
|
+
custom_metric_id = self.custom_metric_id_from_name(blocked_metric_name)
|
|
397
370
|
if custom_metric_id is None:
|
|
398
371
|
self._logger.warning(f"No metric id for '{blocked_metric_name}', not reporting")
|
|
399
372
|
continue
|
|
400
|
-
bucket = self.
|
|
373
|
+
bucket = self.custom_metric_individual_payload(
|
|
401
374
|
custom_metric_id, row[blocked_column_name], association_id
|
|
402
375
|
)
|
|
403
376
|
payload["buckets"].append(bucket)
|
|
@@ -14,8 +14,11 @@ import logging
|
|
|
14
14
|
import math
|
|
15
15
|
import os
|
|
16
16
|
import traceback
|
|
17
|
+
import uuid
|
|
17
18
|
from datetime import datetime
|
|
18
19
|
from datetime import timezone
|
|
20
|
+
from typing import Any
|
|
21
|
+
from typing import Optional
|
|
19
22
|
|
|
20
23
|
import datarobot as dr
|
|
21
24
|
import numpy as np
|
|
@@ -48,11 +51,8 @@ class Pipeline:
|
|
|
48
51
|
self._model_id = None
|
|
49
52
|
self.async_http_client = None
|
|
50
53
|
self._custom_metrics_bulk_upload_url = None
|
|
51
|
-
self._assoc_id_specific_custom_metric_ids = list()
|
|
52
54
|
self.aggregate_custom_metric = None
|
|
53
55
|
self.custom_metric_map = dict()
|
|
54
|
-
# List of custom metrics names which do not need the association id while reporting
|
|
55
|
-
self.custom_metrics_no_association_ids = list()
|
|
56
56
|
self.delayed_custom_metric_creation = False
|
|
57
57
|
self.upload_custom_metrics_tasks = set()
|
|
58
58
|
|
|
@@ -170,12 +170,27 @@ class Pipeline:
|
|
|
170
170
|
self.create_custom_metrics()
|
|
171
171
|
self.delayed_custom_metric_creation = False
|
|
172
172
|
|
|
173
|
+
def add_custom_metric(
|
|
174
|
+
self, metric_definition: dict[str, Any], requires_association_id: bool, **kwargs
|
|
175
|
+
) -> None:
|
|
176
|
+
"""
|
|
177
|
+
Adds an entry to the `custom_metric_map`.
|
|
178
|
+
|
|
179
|
+
NOTE: the kwargs allow implementations to add their own specialized values.
|
|
180
|
+
"""
|
|
181
|
+
name = metric_definition["name"]
|
|
182
|
+
self.custom_metric_map[name] = {
|
|
183
|
+
"metric_definition": metric_definition,
|
|
184
|
+
"requires_association_id": requires_association_id,
|
|
185
|
+
**kwargs,
|
|
186
|
+
}
|
|
187
|
+
|
|
173
188
|
def create_custom_metrics(self):
|
|
174
189
|
"""
|
|
175
190
|
Creates all the custom-metrics in the DR app for an active deployment.
|
|
176
191
|
|
|
177
|
-
|
|
178
|
-
|
|
192
|
+
Updates the `custom_metric_map` with id's to insure the appropriate data
|
|
193
|
+
is put in place for reporting.
|
|
179
194
|
"""
|
|
180
195
|
cleanup_metrics_list = list()
|
|
181
196
|
for index, (metric_name, custom_metric) in enumerate(self.custom_metric_map.items()):
|
|
@@ -196,9 +211,6 @@ class Pipeline:
|
|
|
196
211
|
is_model_specific=metric_definition["isModelSpecific"],
|
|
197
212
|
)
|
|
198
213
|
custom_metric["id"] = _metric_obj.id
|
|
199
|
-
custom_metric["requires_association_id"] = self._requires_association_id(
|
|
200
|
-
metric_name
|
|
201
|
-
)
|
|
202
214
|
except ClientError as e:
|
|
203
215
|
if e.status_code == 409:
|
|
204
216
|
if "not unique for deployment" in e.json["message"]:
|
|
@@ -261,9 +273,6 @@ class Pipeline:
|
|
|
261
273
|
self._logger.error(f"Metric '{metric_name}' exists at DR but not in moderation")
|
|
262
274
|
continue
|
|
263
275
|
self.custom_metric_map[metric_name]["id"] = metric["id"]
|
|
264
|
-
self.custom_metric_map[metric_name]["requires_association_id"] = (
|
|
265
|
-
self._requires_association_id(metric_name)
|
|
266
|
-
)
|
|
267
276
|
|
|
268
277
|
# These are the metrics we couldn't create - so, don't track them
|
|
269
278
|
for metric_name in cleanup_metrics_list:
|
|
@@ -271,12 +280,33 @@ class Pipeline:
|
|
|
271
280
|
self._logger.error(f"Skipping metric creation: {metric_name}")
|
|
272
281
|
del self.custom_metric_map[metric_name]
|
|
273
282
|
|
|
274
|
-
def
|
|
275
|
-
|
|
283
|
+
def custom_metric_id_from_name(self, name: str) -> Optional[str]:
|
|
284
|
+
"""Gets the custom-metric id from the name of a custom metric."""
|
|
285
|
+
identifier = self.custom_metric_map.get(name, {}).get("id")
|
|
286
|
+
return str(identifier) if identifier else None
|
|
276
287
|
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
288
|
+
def custom_metric_individual_payload(
|
|
289
|
+
self, metric_id: Any, value: Any, association_id: Any
|
|
290
|
+
) -> dict[str, Any]:
|
|
291
|
+
"""
|
|
292
|
+
Creates a dictionary for an individual custom-metric value, suitable to report
|
|
293
|
+
in the bulk upload (when surrounded by other stuff).
|
|
294
|
+
"""
|
|
295
|
+
if isinstance(value, bool):
|
|
296
|
+
_value = 1.0 if value else 0.0
|
|
297
|
+
elif isinstance(value, np.bool_):
|
|
298
|
+
_value = 1.0 if value.item() else 0.0
|
|
299
|
+
elif isinstance(value, np.generic):
|
|
300
|
+
_value = value.item()
|
|
301
|
+
else:
|
|
302
|
+
_value = value
|
|
303
|
+
return {
|
|
304
|
+
"customMetricId": str(metric_id),
|
|
305
|
+
"value": _value,
|
|
306
|
+
"associationId": str(association_id),
|
|
307
|
+
"sampleSize": 1,
|
|
308
|
+
"timestamp": str(datetime.now(timezone.utc).isoformat()),
|
|
309
|
+
}
|
|
280
310
|
|
|
281
311
|
@property
|
|
282
312
|
def api_token(self):
|
|
@@ -285,6 +315,10 @@ class Pipeline:
|
|
|
285
315
|
def get_association_id_column_name(self):
|
|
286
316
|
return self._association_id_column_name
|
|
287
317
|
|
|
318
|
+
def generate_association_ids(self, num_rows: int) -> list[str]:
|
|
319
|
+
self._logger.info(f"Generating {num_rows} association ids")
|
|
320
|
+
return [str(uuid.uuid4()) for _ in range(num_rows)]
|
|
321
|
+
|
|
288
322
|
def get_new_metrics_payload(self):
|
|
289
323
|
"""
|
|
290
324
|
Resets the data for aggregate metrics reporting based on the `custom_metric_map`.
|
|
@@ -10,19 +10,24 @@
|
|
|
10
10
|
# https://www.datarobot.com/wp-content/uploads/2021/07/DataRobot-Tool-and-Utility-Agreement.pdf.
|
|
11
11
|
# ---------------------------------------------------------------------------------
|
|
12
12
|
import logging
|
|
13
|
+
import time
|
|
13
14
|
from typing import Any
|
|
15
|
+
from typing import Optional
|
|
14
16
|
|
|
17
|
+
import pandas as pd
|
|
15
18
|
from datarobot.enums import CustomMetricAggregationType
|
|
16
19
|
from datarobot.enums import CustomMetricDirectionality
|
|
17
20
|
|
|
18
21
|
from datarobot_dome.constants import CUSTOM_METRIC_DESCRIPTION_SUFFIX
|
|
19
22
|
from datarobot_dome.constants import LOGGER_NAME_PREFIX
|
|
23
|
+
from datarobot_dome.constants import ModerationEventTypes
|
|
20
24
|
from datarobot_dome.metrics.factory import MetricScorerFactory
|
|
21
25
|
from datarobot_dome.metrics.metric_scorer import MetricScorer
|
|
22
26
|
from datarobot_dome.metrics.metric_scorer import ScorerType
|
|
23
27
|
from datarobot_dome.pipeline.pipeline import Pipeline
|
|
24
28
|
|
|
25
29
|
LATENCY_NAME = "VDB Score Latency"
|
|
30
|
+
DEFAULT_PER_PREDICTION = True
|
|
26
31
|
|
|
27
32
|
score_latency = {
|
|
28
33
|
"name": LATENCY_NAME,
|
|
@@ -37,36 +42,35 @@ score_latency = {
|
|
|
37
42
|
|
|
38
43
|
|
|
39
44
|
class VDBPipeline(Pipeline):
|
|
40
|
-
def __init__(self):
|
|
45
|
+
def __init__(self, config: Optional[dict[str, Any]] = None):
|
|
41
46
|
super().__init__()
|
|
47
|
+
metric_config = config.get("metrics", {}) if config else {}
|
|
42
48
|
self._score_configs: dict[ScorerType, dict[str, Any]] = {
|
|
43
|
-
|
|
44
|
-
ScorerType
|
|
45
|
-
ScorerType.DOCUMENT_AVERAGE: {},
|
|
46
|
-
ScorerType.DOCUMENT_COUNT: {},
|
|
49
|
+
stype.value: metric_config.get(stype.lower().replace("_", "-"), {})
|
|
50
|
+
for stype in ScorerType
|
|
47
51
|
}
|
|
48
52
|
self._scorers: list[MetricScorer] = list()
|
|
49
53
|
self._logger = logging.getLogger(LOGGER_NAME_PREFIX + "." + self.__class__.__name__)
|
|
50
54
|
self._add_default_custom_metrics()
|
|
51
55
|
self.create_custom_metrics_if_any()
|
|
52
56
|
self.create_scorers()
|
|
57
|
+
self.update_custom_metric_association_ids()
|
|
58
|
+
|
|
59
|
+
def __repr__(self) -> str:
|
|
60
|
+
return f"{self.__class__.__name__}({len(self.custom_metrics)} metrics)"
|
|
53
61
|
|
|
54
62
|
def _add_default_custom_metrics(self):
|
|
55
63
|
"""Adds the default custom metrics based on the `_score_configs` map."""
|
|
56
64
|
# create a list of tuples, so we can track the scorer type
|
|
57
|
-
metric_list = [(score_latency, None)]
|
|
65
|
+
metric_list = [(score_latency, False, None)]
|
|
58
66
|
for score_type, score_config in self._score_configs.items():
|
|
59
|
-
|
|
60
|
-
|
|
67
|
+
metric_def = MetricScorerFactory.custom_metric_config(score_type, score_config)
|
|
68
|
+
per_row = score_config.get("per-prediction", DEFAULT_PER_PREDICTION)
|
|
69
|
+
metric_list.append((metric_def, per_row, score_type))
|
|
61
70
|
|
|
62
71
|
# Metric list so far does not need association id for reporting
|
|
63
|
-
for
|
|
64
|
-
|
|
65
|
-
self.custom_metrics_no_association_ids.append(name)
|
|
66
|
-
self.custom_metric_map[name] = {
|
|
67
|
-
"metric_definition": metric_config,
|
|
68
|
-
"scorer_type": score_type,
|
|
69
|
-
}
|
|
72
|
+
for metric_def, per_row, score_type in metric_list:
|
|
73
|
+
self.add_custom_metric(metric_def, per_row, scorer_type=score_type)
|
|
70
74
|
|
|
71
75
|
def create_scorers(self):
|
|
72
76
|
"""
|
|
@@ -90,6 +94,18 @@ class VDBPipeline(Pipeline):
|
|
|
90
94
|
scorer = MetricScorerFactory.create(score_type, score_config)
|
|
91
95
|
self._scorers.append(scorer)
|
|
92
96
|
|
|
97
|
+
def update_custom_metric_association_ids(self):
|
|
98
|
+
"""Update whether tracking per-prediction metrics based on deployment settings."""
|
|
99
|
+
has_assoc = bool(self._association_id_column_name)
|
|
100
|
+
for metric_name, metric_data in self.custom_metric_map.items():
|
|
101
|
+
score_type = metric_data.get("scorer_type")
|
|
102
|
+
if not score_type:
|
|
103
|
+
continue
|
|
104
|
+
|
|
105
|
+
scorer_config = self._score_configs.get(score_type, {})
|
|
106
|
+
per_assoc = scorer_config.get("per-prediction", DEFAULT_PER_PREDICTION)
|
|
107
|
+
metric_data["requires_association_id"] = has_assoc and per_assoc
|
|
108
|
+
|
|
93
109
|
def scorers(self) -> list[MetricScorer]:
|
|
94
110
|
"""Get all scorers for this pipeline."""
|
|
95
111
|
return self._scorers
|
|
@@ -109,11 +125,12 @@ class VDBPipeline(Pipeline):
|
|
|
109
125
|
"""Records aggregate latency metric value locally"""
|
|
110
126
|
self.record_aggregate_value(LATENCY_NAME, latency_in_sec)
|
|
111
127
|
|
|
112
|
-
def report_custom_metrics(self):
|
|
128
|
+
def report_custom_metrics(self, individual_metrics: list[dict[str, Any]]) -> None:
|
|
113
129
|
"""
|
|
114
130
|
Reports all the custom-metrics to DR app.
|
|
115
131
|
|
|
116
|
-
The bulk upload includes grabbing all the aggregated metrics
|
|
132
|
+
The bulk upload includes grabbing all the aggregated metrics, plus the list of
|
|
133
|
+
individual metric payloads.
|
|
117
134
|
"""
|
|
118
135
|
if self.delayed_custom_metric_creation:
|
|
119
136
|
# Flag is not set yet, so no point reporting custom metrics
|
|
@@ -123,5 +140,96 @@ class VDBPipeline(Pipeline):
|
|
|
123
140
|
# in "test" mode, there is not a deployment and therefore no custom_metrics
|
|
124
141
|
return
|
|
125
142
|
|
|
126
|
-
payload = self.add_aggregate_metrics_to_payload({"buckets":
|
|
143
|
+
payload = self.add_aggregate_metrics_to_payload({"buckets": individual_metrics})
|
|
127
144
|
self.upload_custom_metrics(payload)
|
|
145
|
+
|
|
146
|
+
def run_model_score(
|
|
147
|
+
self, input_df: pd.DataFrame, model, drum_score_fn, **kwargs
|
|
148
|
+
) -> pd.DataFrame:
|
|
149
|
+
"""
|
|
150
|
+
A wrapper to execute vdb's `score` method. Wrapper is useful to calculate the
|
|
151
|
+
latency of the `score` method and handle any exceptional conditions
|
|
152
|
+
Returns:
|
|
153
|
+
predictions_df: DataFrame obtained as a return value from user's `score`
|
|
154
|
+
method
|
|
155
|
+
"""
|
|
156
|
+
start_time = time.time()
|
|
157
|
+
|
|
158
|
+
try:
|
|
159
|
+
predictions_df = drum_score_fn(input_df, model, **kwargs)
|
|
160
|
+
except Exception as e:
|
|
161
|
+
title = "Failed to execute vdb score function"
|
|
162
|
+
message = f"Exception: {e}"
|
|
163
|
+
self._logger.error(title + " " + message)
|
|
164
|
+
pd.set_option("display.max_columns", None)
|
|
165
|
+
self._logger.error(input_df)
|
|
166
|
+
self.send_event_sync(
|
|
167
|
+
title, message, ModerationEventTypes.MODERATION_MODEL_SCORING_ERROR
|
|
168
|
+
)
|
|
169
|
+
raise
|
|
170
|
+
|
|
171
|
+
score_latency = time.time() - start_time
|
|
172
|
+
self.record_score_latency(score_latency)
|
|
173
|
+
return predictions_df
|
|
174
|
+
|
|
175
|
+
def score(self, data: pd.DataFrame, model, drum_score_fn, **kwargs):
|
|
176
|
+
"""
|
|
177
|
+
Run on each prediction, and takes care of running the "score" function as well
|
|
178
|
+
as collecting the metrics.
|
|
179
|
+
"""
|
|
180
|
+
self._logger.debug(data)
|
|
181
|
+
|
|
182
|
+
# clear/allocate memory for reporting metrics
|
|
183
|
+
self.get_new_metrics_payload()
|
|
184
|
+
|
|
185
|
+
# add the association-id if not present
|
|
186
|
+
association_id_column_name = self.get_association_id_column_name()
|
|
187
|
+
if (
|
|
188
|
+
association_id_column_name
|
|
189
|
+
and association_id_column_name not in data.columns
|
|
190
|
+
and self.auto_generate_association_ids
|
|
191
|
+
):
|
|
192
|
+
data[association_id_column_name] = self.generate_association_ids(len(data))
|
|
193
|
+
|
|
194
|
+
# NOTE: no "pre-score" calculation on the DataFrame for the predictions
|
|
195
|
+
|
|
196
|
+
# perform the main "score" function for this model
|
|
197
|
+
predictions_df = self.run_model_score(data, model, drum_score_fn, **kwargs)
|
|
198
|
+
|
|
199
|
+
# make sure association ids get copied over
|
|
200
|
+
if (
|
|
201
|
+
association_id_column_name
|
|
202
|
+
and association_id_column_name not in predictions_df.columns
|
|
203
|
+
and association_id_column_name in data.columns
|
|
204
|
+
):
|
|
205
|
+
predictions_df[association_id_column_name] = data[association_id_column_name]
|
|
206
|
+
|
|
207
|
+
# loop through all the metrics scoring with predictions_df that has citations
|
|
208
|
+
association_ids = (
|
|
209
|
+
[]
|
|
210
|
+
if association_id_column_name not in predictions_df.columns
|
|
211
|
+
else predictions_df[association_id_column_name]
|
|
212
|
+
)
|
|
213
|
+
metric_reports = []
|
|
214
|
+
for scorer in self.scorers():
|
|
215
|
+
metric_info = self.custom_metric_map[scorer.name]
|
|
216
|
+
if metric_info.get("requires_association_id", False) and len(association_ids):
|
|
217
|
+
values = scorer.score_rows(predictions_df)
|
|
218
|
+
if not values:
|
|
219
|
+
self.logger.debug(f"No {scorer} values found")
|
|
220
|
+
continue
|
|
221
|
+
|
|
222
|
+
# assign back to the dataframe, so consumer has it
|
|
223
|
+
predictions_df[scorer.name] = values
|
|
224
|
+
metric_id = metric_info.get("id")
|
|
225
|
+
for association_id, value in zip(association_ids, values):
|
|
226
|
+
metric_reports.append(
|
|
227
|
+
self.custom_metric_individual_payload(metric_id, value, association_id)
|
|
228
|
+
)
|
|
229
|
+
continue
|
|
230
|
+
|
|
231
|
+
value = scorer.score(predictions_df)
|
|
232
|
+
self.record_aggregate_value(scorer.name, value)
|
|
233
|
+
|
|
234
|
+
self.report_custom_metrics(metric_reports)
|
|
235
|
+
return predictions_df
|
|
@@ -2,7 +2,7 @@ datarobot_dome/__init__.py,sha256=B5Rx8_CNCNsOpxBbRj27XOXCfRZmvmrAR-NzlzIKnDw,58
|
|
|
2
2
|
datarobot_dome/async_http_client.py,sha256=wkB4irwvnchNGzO1bk2C_HWM-GOSB3AUn5TXKl-X0ZI,9649
|
|
3
3
|
datarobot_dome/chat_helper.py,sha256=BzvtUyZSZxzOqq-5a2wQKhHhr2kMlcP1MFrHaDAeD_o,9671
|
|
4
4
|
datarobot_dome/constants.py,sha256=vM2_JkXbn4dkWARCqxNfLriSo0E05LDXVrwNktptpuc,10416
|
|
5
|
-
datarobot_dome/drum_integration.py,sha256=
|
|
5
|
+
datarobot_dome/drum_integration.py,sha256=KvCtZKKlNbEFkjHEaEw9a3VSAtmx7miHWnxMpjB5Y0A,40487
|
|
6
6
|
datarobot_dome/guard.py,sha256=1INYx17n9ToiB5bzI-jIReUUuqkK_ucxpOx4jQLts6g,33264
|
|
7
7
|
datarobot_dome/guard_executor.py,sha256=AOI8MZeZETHMoFgBePe0wa2vE9d2975MYQnEDHLZL7s,35462
|
|
8
8
|
datarobot_dome/guard_helpers.py,sha256=ajxm-w7MS7eN5DMMO-jbbzjcOYMZ-LvhO53n2NI5_Fk,16773
|
|
@@ -10,15 +10,15 @@ datarobot_dome/guards/__init__.py,sha256=B5Rx8_CNCNsOpxBbRj27XOXCfRZmvmrAR-NzlzI
|
|
|
10
10
|
datarobot_dome/guards/guard_llm_mixin.py,sha256=VovlpNZjWIGamF4SSvLF5lzOFyApH5IoOiB_qtCmRg0,12216
|
|
11
11
|
datarobot_dome/llm.py,sha256=L02OvTrflmD34-FrfXebfF-zzKTeuin7fpne1Cl5psg,5719
|
|
12
12
|
datarobot_dome/metrics/__init__.py,sha256=B5Rx8_CNCNsOpxBbRj27XOXCfRZmvmrAR-NzlzIKnDw,583
|
|
13
|
-
datarobot_dome/metrics/citation_metrics.py,sha256=
|
|
13
|
+
datarobot_dome/metrics/citation_metrics.py,sha256=l2mnV1gz7nQeJ_yfaS4dcP3DFWf0p5QIBnKQ6shLnw4,4652
|
|
14
14
|
datarobot_dome/metrics/factory.py,sha256=7caa8paI9LuFXDgguXdC4on28V7IwwIsKJT2Z-Aps8A,2187
|
|
15
|
-
datarobot_dome/metrics/metric_scorer.py,sha256=
|
|
15
|
+
datarobot_dome/metrics/metric_scorer.py,sha256=uJ_IJRw7ZFHueg8xjsaXbt0ypO7JiydZ0WapCp96yng,2540
|
|
16
16
|
datarobot_dome/pipeline/__init__.py,sha256=B5Rx8_CNCNsOpxBbRj27XOXCfRZmvmrAR-NzlzIKnDw,583
|
|
17
|
-
datarobot_dome/pipeline/llm_pipeline.py,sha256=
|
|
18
|
-
datarobot_dome/pipeline/pipeline.py,sha256=
|
|
19
|
-
datarobot_dome/pipeline/vdb_pipeline.py,sha256=
|
|
17
|
+
datarobot_dome/pipeline/llm_pipeline.py,sha256=g7PAiLOMADr2DQFrtg2NrUj4u_tcvnoiJXrBR8xWsmY,18789
|
|
18
|
+
datarobot_dome/pipeline/pipeline.py,sha256=fGMShAio18oDaz8hE-mO3QYdKbccb-qTIhgtLOLrdJs,17399
|
|
19
|
+
datarobot_dome/pipeline/vdb_pipeline.py,sha256=q3c_Z-hGUqhH6j6n8VpS3wZiBIkWgpRDsBnyJyZhiw4,9855
|
|
20
20
|
datarobot_dome/runtime.py,sha256=FD8wXOweqoQVzbZMh-mucL66xT2kGxPsJUGAcJBgwxw,1468
|
|
21
21
|
datarobot_dome/streaming.py,sha256=6nYvh6SoxPRLfO6GGdEoHsQuyLP9oX1lDMe8IeGo4lw,17801
|
|
22
|
-
datarobot_moderations-11.
|
|
23
|
-
datarobot_moderations-11.
|
|
24
|
-
datarobot_moderations-11.
|
|
22
|
+
datarobot_moderations-11.2.0.dist-info/METADATA,sha256=dj-ypRVsYnUIYEpKZTnxaBQtBo8V99-dP6l7rTMIvow,4793
|
|
23
|
+
datarobot_moderations-11.2.0.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
24
|
+
datarobot_moderations-11.2.0.dist-info/RECORD,,
|
|
File without changes
|