datarobot-moderations 11.1.22__py3-none-any.whl → 11.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -22,6 +22,7 @@ from typing import Optional
22
22
 
23
23
  import numpy as np
24
24
  import pandas as pd
25
+ import yaml
25
26
  from openai.types.chat import ChatCompletionChunk
26
27
  from openai.types.chat.chat_completion import ChatCompletion
27
28
  from openai.types.chat.chat_completion import Choice
@@ -257,11 +258,6 @@ def run_user_score_function(filtered_df, model, pipeline, drum_score_fn, **kwarg
257
258
  return valid_predictions_df, none_predictions_df, score_latency
258
259
 
259
260
 
260
- def _auto_generate_association_ids(num_rows):
261
- _logger.info(f"Auto generating {num_rows} association ids")
262
- return [str(uuid.uuid4()) for _ in range(num_rows)]
263
-
264
-
265
261
  def guard_score_wrapper(data, model, pipeline, drum_score_fn, **kwargs):
266
262
  """
267
263
  Score wrapper function provided by the moderation library. DRUM will invoke this
@@ -291,7 +287,7 @@ def guard_score_wrapper(data, model, pipeline, drum_score_fn, **kwargs):
291
287
  and association_id_column_name not in data.columns
292
288
  and pipeline.auto_generate_association_ids
293
289
  ):
294
- data[association_id_column_name] = _auto_generate_association_ids(data.shape[0])
290
+ data[association_id_column_name] = pipeline.generate_association_ids(data.shape[0])
295
291
 
296
292
  # ==================================================================
297
293
  # Step 1: Prescore Guards processing
@@ -721,7 +717,7 @@ def guard_chat_wrapper(
721
717
  if association_id:
722
718
  data[association_id_column_name] = [association_id]
723
719
  elif pipeline.auto_generate_association_ids:
724
- data[association_id_column_name] = _auto_generate_association_ids(1)
720
+ data[association_id_column_name] = pipeline.generate_association_ids(1)
725
721
  association_id = data[association_id_column_name].tolist()[0]
726
722
 
727
723
  # ==================================================================
@@ -849,63 +845,18 @@ def guard_chat_wrapper(
849
845
  )
850
846
 
851
847
 
852
- def run_vdb_score_function(
853
- input_df: pd.DataFrame, model, pipeline, drum_score_fn, **kwargs
854
- ) -> pd.DataFrame:
855
- """
856
- A wrapper to execute vdb's `score` method. Wrapper is useful to calculate the
857
- latency of the `score` method and handle any exceptional conditions
858
- Returns:
859
- predictions_df: DataFrame obtained as a return value from user's `score`
860
- method
861
- """
862
- start_time = time.time()
863
-
864
- try:
865
- predictions_df = drum_score_fn(input_df, model, **kwargs)
866
- except Exception as e:
867
- title = "Failed to execute vdb score function"
868
- message = f"Exception: {e}"
869
- _logger.error(title + " " + message)
870
- pd.set_option("display.max_columns", None)
871
- _logger.error(input_df)
872
- pipeline.send_event_sync(
873
- title, message, ModerationEventTypes.MODERATION_MODEL_SCORING_ERROR
874
- )
875
- raise
876
-
877
- score_latency = time.time() - start_time
878
- pipeline.record_score_latency(score_latency)
879
- return predictions_df
880
-
881
-
882
- def vdb_score_wrapper(data: pd.DataFrame, model, pipeline: VDBPipeline, drum_score_fn, **kwargs):
883
- """
884
- Run on each prediction, and takes care of running the "score" function as well
885
- as collecting the metrics for the VDB pipeline.
886
- """
887
- _logger.debug(data)
888
-
889
- # clear/allocate memory for reporting metrics
890
- pipeline.get_new_metrics_payload()
891
-
892
- # NOTE: no "pre-score" calculation on the DataFrame for the predictions
893
-
894
- # perform the main "score" function for this model
895
- predictions_df = run_vdb_score_function(data, model, pipeline, drum_score_fn, **kwargs)
896
-
897
- # loop through all the metrics scoring for the pipeline with predictions_df that has citations
898
- for scorer in pipeline.scorers():
899
- value = scorer.score(predictions_df)
900
- pipeline.record_aggregate_value(scorer.name, value)
901
-
902
- pipeline.report_custom_metrics()
903
- return predictions_df
848
+ def vdb_init(model_dir: str = os.getcwd()):
849
+ """Initializes a VDB pipeline."""
850
+ config = {}
904
851
 
852
+ config_file = os.path.join(model_dir, MODERATION_CONFIG_FILE_NAME)
853
+ if not os.path.exists(config_file):
854
+ _logger.info(f"No config file ({config_file}) found")
855
+ else:
856
+ with open(config_file) as fp:
857
+ config = yaml.safe_load(fp)
905
858
 
906
- def vdb_init():
907
- """Initializes a VDB pipeline."""
908
- return VDBPipeline()
859
+ return VDBPipeline(config)
909
860
 
910
861
 
911
862
  def init(model_dir: str = os.getcwd()):
@@ -992,7 +943,7 @@ class VdbModerationPipeline(ModerationPipeline):
992
943
 
993
944
  def score(self, data: pd.DataFrame, model, drum_score_fn, **kwargs):
994
945
  """Calls the VDB score function."""
995
- return vdb_score_wrapper(data, model, self._pipeline, drum_score_fn, **kwargs)
946
+ return self._pipeline.score(data, model, drum_score_fn, **kwargs)
996
947
 
997
948
 
998
949
  def moderation_pipeline_factory(
@@ -1007,7 +958,7 @@ def moderation_pipeline_factory(
1007
958
  return LlmModerationPipeline(pipeline)
1008
959
 
1009
960
  if target_type in TargetType.vdb():
1010
- pipeline = vdb_init()
961
+ pipeline = vdb_init(model_dir=model_dir)
1011
962
  if pipeline:
1012
963
  return VdbModerationPipeline(pipeline)
1013
964
 
@@ -29,6 +29,13 @@ class CitationTokenCountScorer(MetricScorer):
29
29
  BASELINE_VALUE = 0
30
30
  INPUT_COLUMN = CITATION_COLUMN
31
31
 
32
+ def score_rows(self, df: pd.DataFrame) -> list[float]:
33
+ column = self.input_column
34
+ if column not in df.columns:
35
+ return []
36
+
37
+ return [sum(get_token_count(v, self.encoding) for v in cell) for cell in df[column].values]
38
+
32
39
  def score(self, df: pd.DataFrame) -> float:
33
40
  column = self.input_column
34
41
  if column not in df.columns:
@@ -48,6 +55,19 @@ class CitationTokenAverageScorer(MetricScorer):
48
55
  BASELINE_VALUE = 0
49
56
  INPUT_COLUMN = CITATION_COLUMN
50
57
 
58
+ def score_rows(self, df: pd.DataFrame) -> []:
59
+ column = self.input_column
60
+ if column not in df.columns:
61
+ return []
62
+
63
+ averages = []
64
+ for cell in df[column].values:
65
+ total = sum(get_token_count(v, self.encoding) for v in cell)
66
+ count = sum(v != "" for v in cell)
67
+ averages.append(total / count)
68
+
69
+ return averages
70
+
51
71
  def score(self, df: pd.DataFrame) -> float:
52
72
  average = 0.0
53
73
  total = 0
@@ -73,6 +93,13 @@ class DocumentCountScorer(MetricScorer):
73
93
  BASELINE_VALUE = 0
74
94
  INPUT_COLUMN = CITATION_COLUMN
75
95
 
96
+ def score_rows(self, df: pd.DataFrame) -> list[float]:
97
+ column = self.input_column
98
+ if column not in df.columns:
99
+ return []
100
+
101
+ return [sum(bool(v) for v in cell) for cell in df[column].values]
102
+
76
103
  def score(self, df: pd.DataFrame) -> float:
77
104
  column = self.input_column
78
105
  if column not in df.columns:
@@ -90,6 +117,13 @@ class DocumentAverageScorer(MetricScorer):
90
117
  BASELINE_VALUE = 0
91
118
  INPUT_COLUMN = CITATION_COLUMN
92
119
 
120
+ def score_rows(self, df: pd.DataFrame) -> list[float]:
121
+ column = self.input_column
122
+ if column not in df.columns:
123
+ return []
124
+
125
+ return [sum(bool(v) for v in cell) for cell in df[column].values]
126
+
93
127
  def score(self, df: pd.DataFrame) -> float:
94
128
  column = self.input_column
95
129
  if column not in df.columns:
@@ -61,10 +61,6 @@ class MetricScorer(ABC):
61
61
  def name(self) -> str:
62
62
  return self.config.get("name", self.NAME)
63
63
 
64
- @property
65
- def per_prediction(self) -> bool:
66
- return self.config.get("per_prediction", False)
67
-
68
64
  @property
69
65
  def input_column(self) -> str:
70
66
  return self.config.get("input_column", self.INPUT_COLUMN)
@@ -75,4 +71,8 @@ class MetricScorer(ABC):
75
71
 
76
72
  @abstractmethod
77
73
  def score(self, df: pd.DataFrame) -> float:
78
- pass
74
+ pass # pragma: no cover
75
+
76
+ @abstractmethod
77
+ def score_rows(self, df: pd.DataFrame) -> list[float]:
78
+ pass # pragma: no cover
@@ -12,10 +12,7 @@
12
12
  import logging
13
13
  import math
14
14
  import os
15
- from datetime import datetime
16
- from datetime import timezone
17
15
 
18
- import numpy as np
19
16
  import yaml
20
17
  from datarobot.enums import CustomMetricAggregationType
21
18
  from datarobot.enums import CustomMetricDirectionality
@@ -199,44 +196,37 @@ class LLMPipeline(Pipeline):
199
196
  guard.set_pipeline(self)
200
197
 
201
198
  if guard.has_average_score_custom_metric():
202
- self.custom_metric_map[guard.get_average_score_custom_metric_name(guard_stage)] = {
203
- "metric_definition": self._get_average_score_metric_definition(guard)
204
- }
199
+ metric_def = self._get_average_score_metric_definition(guard)
200
+ self.add_custom_metric(metric_def, True)
205
201
 
206
202
  if guard.has_latency_custom_metric():
207
- self.custom_metric_map[guard.get_latency_custom_metric_name()] = {
208
- "metric_definition": guard.get_latency_custom_metric()
209
- }
203
+ metric_def = guard.get_latency_custom_metric()
204
+ self.add_custom_metric(metric_def, False)
210
205
 
211
206
  if intervention_action:
212
207
  # Enforced metric for all kinds of guards, as long as they have intervention
213
208
  # action defined - even for token count
214
- self.custom_metric_map[
215
- guard.get_guard_enforced_custom_metric_name(guard_stage, intervention_action)
216
- ] = {
217
- "metric_definition": guard.get_enforced_custom_metric(
218
- guard_stage, intervention_action
219
- )
220
- }
221
- self.custom_metrics_no_association_ids.append(guard.get_latency_custom_metric_name())
209
+ metric_def = guard.get_enforced_custom_metric(guard_stage, intervention_action)
210
+ self.add_custom_metric(metric_def, True)
222
211
 
223
212
  def _add_default_custom_metrics(self):
224
213
  """Default custom metrics"""
225
- metric_list = [
214
+ # These metrics do not need association id for reporting
215
+ for metric_def in [
226
216
  get_total_custom_metric(GuardStage.PROMPT),
227
217
  get_total_custom_metric(GuardStage.RESPONSE),
228
218
  prescore_guard_latency_custom_metric,
229
219
  postscore_guard_latency_custom_metric,
230
220
  score_latency,
231
- ]
232
- # Metric list so far does not need association id for reporting
233
- for metric in metric_list:
234
- self.custom_metrics_no_association_ids.append(metric["name"])
221
+ ]:
222
+ self.add_custom_metric(metric_def, False)
235
223
 
236
- metric_list.append(get_blocked_custom_metric(GuardStage.PROMPT))
237
- metric_list.append(get_blocked_custom_metric(GuardStage.RESPONSE))
238
- for metric in metric_list:
239
- self.custom_metric_map[metric["name"]] = {"metric_definition": metric}
224
+ # These metrics report with an association-id
225
+ for metric_def in [
226
+ get_blocked_custom_metric(GuardStage.PROMPT),
227
+ get_blocked_custom_metric(GuardStage.RESPONSE),
228
+ ]:
229
+ self.add_custom_metric(metric_def, True)
240
230
 
241
231
  def _add_guard_to_pipeline(self, guard):
242
232
  if guard.stage == GuardStage.PROMPT:
@@ -299,23 +289,6 @@ class LLMPipeline(Pipeline):
299
289
  else (os.environ.get("TARGET_NAME").replace('"', ""))
300
290
  )
301
291
 
302
- def _set_custom_metrics_individual_entry(self, metric_id, value, association_id):
303
- if isinstance(value, bool):
304
- _value = 1.0 if value else 0.0
305
- elif isinstance(value, np.bool_):
306
- _value = 1.0 if value.item() else 0.0
307
- elif isinstance(value, np.generic):
308
- _value = value.item()
309
- else:
310
- _value = value
311
- return {
312
- "customMetricId": str(metric_id),
313
- "value": _value,
314
- "associationId": str(association_id),
315
- "sampleSize": 1,
316
- "timestamp": str(datetime.now(timezone.utc).isoformat()),
317
- }
318
-
319
292
  def get_enforced_column_name(self, guard, stage):
320
293
  input_column = self.get_input_column(stage)
321
294
  intervention_action = guard.get_intervention_action()
@@ -366,14 +339,14 @@ class LLMPipeline(Pipeline):
366
339
  f"Not reporting the value with association id {association_id}"
367
340
  )
368
341
  continue
369
- custom_metric_id = self.custom_metric_map[metric_name].get("id")
342
+ custom_metric_id = self.custom_metric_id_from_name(metric_name)
370
343
  if custom_metric_id is None:
371
344
  self._logger.warning(f"No metric id for '{metric_name}', not reporting")
372
345
  continue
373
- bucket = self._set_custom_metrics_individual_entry(
346
+ item = self.custom_metric_individual_payload(
374
347
  custom_metric_id, row[column_name], association_id
375
348
  )
376
- buckets.append(bucket)
349
+ buckets.append(item)
377
350
  return buckets
378
351
 
379
352
  def _get_blocked_column_name_from_result_df(self, stage):
@@ -393,11 +366,11 @@ class LLMPipeline(Pipeline):
393
366
  if math.isnan(row[blocked_column_name]):
394
367
  # If prompt is blocked, response will be NaN, so don't report it
395
368
  continue
396
- custom_metric_id = self.custom_metric_map[blocked_metric_name].get("id")
369
+ custom_metric_id = self.custom_metric_id_from_name(blocked_metric_name)
397
370
  if custom_metric_id is None:
398
371
  self._logger.warning(f"No metric id for '{blocked_metric_name}', not reporting")
399
372
  continue
400
- bucket = self._set_custom_metrics_individual_entry(
373
+ bucket = self.custom_metric_individual_payload(
401
374
  custom_metric_id, row[blocked_column_name], association_id
402
375
  )
403
376
  payload["buckets"].append(bucket)
@@ -14,8 +14,11 @@ import logging
14
14
  import math
15
15
  import os
16
16
  import traceback
17
+ import uuid
17
18
  from datetime import datetime
18
19
  from datetime import timezone
20
+ from typing import Any
21
+ from typing import Optional
19
22
 
20
23
  import datarobot as dr
21
24
  import numpy as np
@@ -48,11 +51,8 @@ class Pipeline:
48
51
  self._model_id = None
49
52
  self.async_http_client = None
50
53
  self._custom_metrics_bulk_upload_url = None
51
- self._assoc_id_specific_custom_metric_ids = list()
52
54
  self.aggregate_custom_metric = None
53
55
  self.custom_metric_map = dict()
54
- # List of custom metrics names which do not need the association id while reporting
55
- self.custom_metrics_no_association_ids = list()
56
56
  self.delayed_custom_metric_creation = False
57
57
  self.upload_custom_metrics_tasks = set()
58
58
 
@@ -170,12 +170,27 @@ class Pipeline:
170
170
  self.create_custom_metrics()
171
171
  self.delayed_custom_metric_creation = False
172
172
 
173
+ def add_custom_metric(
174
+ self, metric_definition: dict[str, Any], requires_association_id: bool, **kwargs
175
+ ) -> None:
176
+ """
177
+ Adds an entry to the `custom_metric_map`.
178
+
179
+ NOTE: the kwargs allow implementations to add their own specialized values.
180
+ """
181
+ name = metric_definition["name"]
182
+ self.custom_metric_map[name] = {
183
+ "metric_definition": metric_definition,
184
+ "requires_association_id": requires_association_id,
185
+ **kwargs,
186
+ }
187
+
173
188
  def create_custom_metrics(self):
174
189
  """
175
190
  Creates all the custom-metrics in the DR app for an active deployment.
176
191
 
177
- The `custom_metric_map` and `_requires_association_id` attributes are consulted to
178
- insure the appropriate data is put in place for reporting.
192
+ Updates the `custom_metric_map` with id's to insure the appropriate data
193
+ is put in place for reporting.
179
194
  """
180
195
  cleanup_metrics_list = list()
181
196
  for index, (metric_name, custom_metric) in enumerate(self.custom_metric_map.items()):
@@ -196,14 +211,11 @@ class Pipeline:
196
211
  is_model_specific=metric_definition["isModelSpecific"],
197
212
  )
198
213
  custom_metric["id"] = _metric_obj.id
199
- custom_metric["requires_association_id"] = self._requires_association_id(
200
- metric_name
201
- )
202
214
  except ClientError as e:
203
215
  if e.status_code == 409:
204
216
  if "not unique for deployment" in e.json["message"]:
205
217
  # Duplicate entry nothing to worry - just continue
206
- self._logger.error(f"Metric '{metric_name}' already exists, skipping")
218
+ self._logger.warning(f"Metric '{metric_name}' already exists, skipping")
207
219
  continue
208
220
  elif e.json["message"].startswith("Maximum number of custom metrics reached"):
209
221
  # Reached the limit - we can't create more
@@ -261,9 +273,6 @@ class Pipeline:
261
273
  self._logger.error(f"Metric '{metric_name}' exists at DR but not in moderation")
262
274
  continue
263
275
  self.custom_metric_map[metric_name]["id"] = metric["id"]
264
- self.custom_metric_map[metric_name]["requires_association_id"] = (
265
- self._requires_association_id(metric_name)
266
- )
267
276
 
268
277
  # These are the metrics we couldn't create - so, don't track them
269
278
  for metric_name in cleanup_metrics_list:
@@ -271,12 +280,33 @@ class Pipeline:
271
280
  self._logger.error(f"Skipping metric creation: {metric_name}")
272
281
  del self.custom_metric_map[metric_name]
273
282
 
274
- def _requires_association_id(self, metric_name):
275
- return metric_name not in self.custom_metrics_no_association_ids
283
+ def custom_metric_id_from_name(self, name: str) -> Optional[str]:
284
+ """Gets the custom-metric id from the name of a custom metric."""
285
+ identifier = self.custom_metric_map.get(name, {}).get("id")
286
+ return str(identifier) if identifier else None
276
287
 
277
- @property
278
- def prediction_url(self):
279
- return self._datarobot_url
288
+ def custom_metric_individual_payload(
289
+ self, metric_id: Any, value: Any, association_id: Any
290
+ ) -> dict[str, Any]:
291
+ """
292
+ Creates a dictionary for an individual custom-metric value, suitable to report
293
+ in the bulk upload (when surrounded by other stuff).
294
+ """
295
+ if isinstance(value, bool):
296
+ _value = 1.0 if value else 0.0
297
+ elif isinstance(value, np.bool_):
298
+ _value = 1.0 if value.item() else 0.0
299
+ elif isinstance(value, np.generic):
300
+ _value = value.item()
301
+ else:
302
+ _value = value
303
+ return {
304
+ "customMetricId": str(metric_id),
305
+ "value": _value,
306
+ "associationId": str(association_id),
307
+ "sampleSize": 1,
308
+ "timestamp": str(datetime.now(timezone.utc).isoformat()),
309
+ }
280
310
 
281
311
  @property
282
312
  def api_token(self):
@@ -285,6 +315,10 @@ class Pipeline:
285
315
  def get_association_id_column_name(self):
286
316
  return self._association_id_column_name
287
317
 
318
+ def generate_association_ids(self, num_rows: int) -> list[str]:
319
+ self._logger.info(f"Generating {num_rows} association ids")
320
+ return [str(uuid.uuid4()) for _ in range(num_rows)]
321
+
288
322
  def get_new_metrics_payload(self):
289
323
  """
290
324
  Resets the data for aggregate metrics reporting based on the `custom_metric_map`.
@@ -10,19 +10,24 @@
10
10
  # https://www.datarobot.com/wp-content/uploads/2021/07/DataRobot-Tool-and-Utility-Agreement.pdf.
11
11
  # ---------------------------------------------------------------------------------
12
12
  import logging
13
+ import time
13
14
  from typing import Any
15
+ from typing import Optional
14
16
 
17
+ import pandas as pd
15
18
  from datarobot.enums import CustomMetricAggregationType
16
19
  from datarobot.enums import CustomMetricDirectionality
17
20
 
18
21
  from datarobot_dome.constants import CUSTOM_METRIC_DESCRIPTION_SUFFIX
19
22
  from datarobot_dome.constants import LOGGER_NAME_PREFIX
23
+ from datarobot_dome.constants import ModerationEventTypes
20
24
  from datarobot_dome.metrics.factory import MetricScorerFactory
21
25
  from datarobot_dome.metrics.metric_scorer import MetricScorer
22
26
  from datarobot_dome.metrics.metric_scorer import ScorerType
23
27
  from datarobot_dome.pipeline.pipeline import Pipeline
24
28
 
25
29
  LATENCY_NAME = "VDB Score Latency"
30
+ DEFAULT_PER_PREDICTION = True
26
31
 
27
32
  score_latency = {
28
33
  "name": LATENCY_NAME,
@@ -37,36 +42,35 @@ score_latency = {
37
42
 
38
43
 
39
44
  class VDBPipeline(Pipeline):
40
- def __init__(self):
45
+ def __init__(self, config: Optional[dict[str, Any]] = None):
41
46
  super().__init__()
47
+ metric_config = config.get("metrics", {}) if config else {}
42
48
  self._score_configs: dict[ScorerType, dict[str, Any]] = {
43
- ScorerType.CITATION_TOKEN_AVERAGE: {},
44
- ScorerType.CITATION_TOKEN_COUNT: {},
45
- ScorerType.DOCUMENT_AVERAGE: {},
46
- ScorerType.DOCUMENT_COUNT: {},
49
+ stype.value: metric_config.get(stype.lower().replace("_", "-"), {})
50
+ for stype in ScorerType
47
51
  }
48
52
  self._scorers: list[MetricScorer] = list()
49
53
  self._logger = logging.getLogger(LOGGER_NAME_PREFIX + "." + self.__class__.__name__)
50
54
  self._add_default_custom_metrics()
51
55
  self.create_custom_metrics_if_any()
52
56
  self.create_scorers()
57
+ self.update_custom_metric_association_ids()
58
+
59
+ def __repr__(self) -> str:
60
+ return f"{self.__class__.__name__}({len(self.custom_metrics)} metrics)"
53
61
 
54
62
  def _add_default_custom_metrics(self):
55
63
  """Adds the default custom metrics based on the `_score_configs` map."""
56
64
  # create a list of tuples, so we can track the scorer type
57
- metric_list = [(score_latency, None)]
65
+ metric_list = [(score_latency, False, None)]
58
66
  for score_type, score_config in self._score_configs.items():
59
- metric_config = MetricScorerFactory.custom_metric_config(score_type, score_config)
60
- metric_list.append((metric_config, score_type))
67
+ metric_def = MetricScorerFactory.custom_metric_config(score_type, score_config)
68
+ per_row = score_config.get("per-prediction", DEFAULT_PER_PREDICTION)
69
+ metric_list.append((metric_def, per_row, score_type))
61
70
 
62
71
  # Metric list so far does not need association id for reporting
63
- for metric_config, score_type in metric_list:
64
- name = metric_config["name"]
65
- self.custom_metrics_no_association_ids.append(name)
66
- self.custom_metric_map[name] = {
67
- "metric_definition": metric_config,
68
- "scorer_type": score_type,
69
- }
72
+ for metric_def, per_row, score_type in metric_list:
73
+ self.add_custom_metric(metric_def, per_row, scorer_type=score_type)
70
74
 
71
75
  def create_scorers(self):
72
76
  """
@@ -90,6 +94,18 @@ class VDBPipeline(Pipeline):
90
94
  scorer = MetricScorerFactory.create(score_type, score_config)
91
95
  self._scorers.append(scorer)
92
96
 
97
+ def update_custom_metric_association_ids(self):
98
+ """Update whether tracking per-prediction metrics based on deployment settings."""
99
+ has_assoc = bool(self._association_id_column_name)
100
+ for metric_name, metric_data in self.custom_metric_map.items():
101
+ score_type = metric_data.get("scorer_type")
102
+ if not score_type:
103
+ continue
104
+
105
+ scorer_config = self._score_configs.get(score_type, {})
106
+ per_assoc = scorer_config.get("per-prediction", DEFAULT_PER_PREDICTION)
107
+ metric_data["requires_association_id"] = has_assoc and per_assoc
108
+
93
109
  def scorers(self) -> list[MetricScorer]:
94
110
  """Get all scorers for this pipeline."""
95
111
  return self._scorers
@@ -109,11 +125,12 @@ class VDBPipeline(Pipeline):
109
125
  """Records aggregate latency metric value locally"""
110
126
  self.record_aggregate_value(LATENCY_NAME, latency_in_sec)
111
127
 
112
- def report_custom_metrics(self):
128
+ def report_custom_metrics(self, individual_metrics: list[dict[str, Any]]) -> None:
113
129
  """
114
130
  Reports all the custom-metrics to DR app.
115
131
 
116
- The bulk upload includes grabbing all the aggregated metrics.
132
+ The bulk upload includes grabbing all the aggregated metrics, plus the list of
133
+ individual metric payloads.
117
134
  """
118
135
  if self.delayed_custom_metric_creation:
119
136
  # Flag is not set yet, so no point reporting custom metrics
@@ -123,5 +140,96 @@ class VDBPipeline(Pipeline):
123
140
  # in "test" mode, there is not a deployment and therefore no custom_metrics
124
141
  return
125
142
 
126
- payload = self.add_aggregate_metrics_to_payload({"buckets": []})
143
+ payload = self.add_aggregate_metrics_to_payload({"buckets": individual_metrics})
127
144
  self.upload_custom_metrics(payload)
145
+
146
+ def run_model_score(
147
+ self, input_df: pd.DataFrame, model, drum_score_fn, **kwargs
148
+ ) -> pd.DataFrame:
149
+ """
150
+ A wrapper to execute vdb's `score` method. Wrapper is useful to calculate the
151
+ latency of the `score` method and handle any exceptional conditions
152
+ Returns:
153
+ predictions_df: DataFrame obtained as a return value from user's `score`
154
+ method
155
+ """
156
+ start_time = time.time()
157
+
158
+ try:
159
+ predictions_df = drum_score_fn(input_df, model, **kwargs)
160
+ except Exception as e:
161
+ title = "Failed to execute vdb score function"
162
+ message = f"Exception: {e}"
163
+ self._logger.error(title + " " + message)
164
+ pd.set_option("display.max_columns", None)
165
+ self._logger.error(input_df)
166
+ self.send_event_sync(
167
+ title, message, ModerationEventTypes.MODERATION_MODEL_SCORING_ERROR
168
+ )
169
+ raise
170
+
171
+ score_latency = time.time() - start_time
172
+ self.record_score_latency(score_latency)
173
+ return predictions_df
174
+
175
+ def score(self, data: pd.DataFrame, model, drum_score_fn, **kwargs):
176
+ """
177
+ Run on each prediction, and takes care of running the "score" function as well
178
+ as collecting the metrics.
179
+ """
180
+ self._logger.debug(data)
181
+
182
+ # clear/allocate memory for reporting metrics
183
+ self.get_new_metrics_payload()
184
+
185
+ # add the association-id if not present
186
+ association_id_column_name = self.get_association_id_column_name()
187
+ if (
188
+ association_id_column_name
189
+ and association_id_column_name not in data.columns
190
+ and self.auto_generate_association_ids
191
+ ):
192
+ data[association_id_column_name] = self.generate_association_ids(len(data))
193
+
194
+ # NOTE: no "pre-score" calculation on the DataFrame for the predictions
195
+
196
+ # perform the main "score" function for this model
197
+ predictions_df = self.run_model_score(data, model, drum_score_fn, **kwargs)
198
+
199
+ # make sure association ids get copied over
200
+ if (
201
+ association_id_column_name
202
+ and association_id_column_name not in predictions_df.columns
203
+ and association_id_column_name in data.columns
204
+ ):
205
+ predictions_df[association_id_column_name] = data[association_id_column_name]
206
+
207
+ # loop through all the metrics scoring with predictions_df that has citations
208
+ association_ids = (
209
+ []
210
+ if association_id_column_name not in predictions_df.columns
211
+ else predictions_df[association_id_column_name]
212
+ )
213
+ metric_reports = []
214
+ for scorer in self.scorers():
215
+ metric_info = self.custom_metric_map[scorer.name]
216
+ if metric_info.get("requires_association_id", False) and len(association_ids):
217
+ values = scorer.score_rows(predictions_df)
218
+ if not values:
219
+ self.logger.debug(f"No {scorer} values found")
220
+ continue
221
+
222
+ # assign back to the dataframe, so consumer has it
223
+ predictions_df[scorer.name] = values
224
+ metric_id = metric_info.get("id")
225
+ for association_id, value in zip(association_ids, values):
226
+ metric_reports.append(
227
+ self.custom_metric_individual_payload(metric_id, value, association_id)
228
+ )
229
+ continue
230
+
231
+ value = scorer.score(predictions_df)
232
+ self.record_aggregate_value(scorer.name, value)
233
+
234
+ self.report_custom_metrics(metric_reports)
235
+ return predictions_df
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: datarobot-moderations
3
- Version: 11.1.22
3
+ Version: 11.2.1
4
4
  Summary: DataRobot Monitoring and Moderation framework
5
5
  License: DataRobot Tool and Utility Agreement
6
6
  Author: DataRobot
@@ -19,10 +19,9 @@ Requires-Dist: deepeval (==2.7.9)
19
19
  Requires-Dist: langchain (>=0.1.12)
20
20
  Requires-Dist: langchain-nvidia-ai-endpoints (>=0.3.9)
21
21
  Requires-Dist: langchain-openai (>=0.1.7)
22
- Requires-Dist: llama-index (>=0.12.9)
22
+ Requires-Dist: llama-index (>=0.12.49)
23
23
  Requires-Dist: llama-index-embeddings-azure-openai (>=0.1.6)
24
24
  Requires-Dist: llama-index-llms-bedrock-converse (>=0.1.6)
25
- Requires-Dist: llama-index-llms-fireworks (>=0.1.5)
26
25
  Requires-Dist: llama-index-llms-langchain (>=0.1.3)
27
26
  Requires-Dist: llama-index-llms-vertex (>=0.1.5)
28
27
  Requires-Dist: nemoguardrails (>=0.9.0)
@@ -2,7 +2,7 @@ datarobot_dome/__init__.py,sha256=B5Rx8_CNCNsOpxBbRj27XOXCfRZmvmrAR-NzlzIKnDw,58
2
2
  datarobot_dome/async_http_client.py,sha256=wkB4irwvnchNGzO1bk2C_HWM-GOSB3AUn5TXKl-X0ZI,9649
3
3
  datarobot_dome/chat_helper.py,sha256=BzvtUyZSZxzOqq-5a2wQKhHhr2kMlcP1MFrHaDAeD_o,9671
4
4
  datarobot_dome/constants.py,sha256=vM2_JkXbn4dkWARCqxNfLriSo0E05LDXVrwNktptpuc,10416
5
- datarobot_dome/drum_integration.py,sha256=DGtf2XfVb8uxNWOFyJwB6l7QObFLaDQFN13vYhA68Rs,42293
5
+ datarobot_dome/drum_integration.py,sha256=HresblJwlCk_sRnWReWQWeZMg5rYzKTA2hjmy1Rcn6U,40553
6
6
  datarobot_dome/guard.py,sha256=1INYx17n9ToiB5bzI-jIReUUuqkK_ucxpOx4jQLts6g,33264
7
7
  datarobot_dome/guard_executor.py,sha256=AOI8MZeZETHMoFgBePe0wa2vE9d2975MYQnEDHLZL7s,35462
8
8
  datarobot_dome/guard_helpers.py,sha256=ajxm-w7MS7eN5DMMO-jbbzjcOYMZ-LvhO53n2NI5_Fk,16773
@@ -10,15 +10,15 @@ datarobot_dome/guards/__init__.py,sha256=B5Rx8_CNCNsOpxBbRj27XOXCfRZmvmrAR-NzlzI
10
10
  datarobot_dome/guards/guard_llm_mixin.py,sha256=VovlpNZjWIGamF4SSvLF5lzOFyApH5IoOiB_qtCmRg0,12216
11
11
  datarobot_dome/llm.py,sha256=L02OvTrflmD34-FrfXebfF-zzKTeuin7fpne1Cl5psg,5719
12
12
  datarobot_dome/metrics/__init__.py,sha256=B5Rx8_CNCNsOpxBbRj27XOXCfRZmvmrAR-NzlzIKnDw,583
13
- datarobot_dome/metrics/citation_metrics.py,sha256=q0hTMWuk6wy_jqk2UjFPON3kU94HN3W2vxr9giJ8O8E,3544
13
+ datarobot_dome/metrics/citation_metrics.py,sha256=l2mnV1gz7nQeJ_yfaS4dcP3DFWf0p5QIBnKQ6shLnw4,4652
14
14
  datarobot_dome/metrics/factory.py,sha256=7caa8paI9LuFXDgguXdC4on28V7IwwIsKJT2Z-Aps8A,2187
15
- datarobot_dome/metrics/metric_scorer.py,sha256=mGxW3NNP93LpbpOiX3MeYyd0YEEjTPE8WVYMGS4SWoY,2516
15
+ datarobot_dome/metrics/metric_scorer.py,sha256=uJ_IJRw7ZFHueg8xjsaXbt0ypO7JiydZ0WapCp96yng,2540
16
16
  datarobot_dome/pipeline/__init__.py,sha256=B5Rx8_CNCNsOpxBbRj27XOXCfRZmvmrAR-NzlzIKnDw,583
17
- datarobot_dome/pipeline/llm_pipeline.py,sha256=fOp_OJnQMDUJH-LKv12kEqli-EqfHjAiSTFqtxzMkhM,19942
18
- datarobot_dome/pipeline/pipeline.py,sha256=YrGR3uS7kY9dd30B97cMESBbVNsze25RSoIcjEMqCb0,16251
19
- datarobot_dome/pipeline/vdb_pipeline.py,sha256=WTOGn1qe_ZvEcdlvHgeXxl2xTqp7GjfL13c6S-FmAfM,5146
17
+ datarobot_dome/pipeline/llm_pipeline.py,sha256=g7PAiLOMADr2DQFrtg2NrUj4u_tcvnoiJXrBR8xWsmY,18789
18
+ datarobot_dome/pipeline/pipeline.py,sha256=7UmvrZtNxTGewpgM4cf2oThHPoJSarEU1Dyp7xEsASU,17401
19
+ datarobot_dome/pipeline/vdb_pipeline.py,sha256=q3c_Z-hGUqhH6j6n8VpS3wZiBIkWgpRDsBnyJyZhiw4,9855
20
20
  datarobot_dome/runtime.py,sha256=FD8wXOweqoQVzbZMh-mucL66xT2kGxPsJUGAcJBgwxw,1468
21
21
  datarobot_dome/streaming.py,sha256=6nYvh6SoxPRLfO6GGdEoHsQuyLP9oX1lDMe8IeGo4lw,17801
22
- datarobot_moderations-11.1.22.dist-info/METADATA,sha256=tL9-6ZvQXyVLxDbx5EvMi4x3dlsxJJy_ussBOSoPPVQ,4794
23
- datarobot_moderations-11.1.22.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
24
- datarobot_moderations-11.1.22.dist-info/RECORD,,
22
+ datarobot_moderations-11.2.1.dist-info/METADATA,sha256=fEyM5I3z0qS9dT2ofZ3J8UAN_ybm2SBF_46mifEHIpA,4742
23
+ datarobot_moderations-11.2.1.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
24
+ datarobot_moderations-11.2.1.dist-info/RECORD,,