datarobot-moderations 11.1.14__py3-none-any.whl → 11.1.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -69,6 +69,8 @@ LLM_CONTEXT_COLUMN_NAME = "_LLM_CONTEXT"
69
69
  PROMPT_TOKEN_COUNT_COLUMN_NAME_FROM_USAGE = "prompt_token_count_from_usage"
70
70
  RESPONSE_TOKEN_COUNT_COLUMN_NAME_FROM_USAGE = "response_token_count_from_usage"
71
71
 
72
+ SPAN_PREFIX = "datarobot.guard"
73
+
72
74
 
73
75
  class TargetType(str, Enum):
74
76
  """Target types that may be handed to moderations from DRUM -- casing must align."""
@@ -26,6 +26,7 @@ from openai.types.chat import ChatCompletionChunk
26
26
  from openai.types.chat.chat_completion import ChatCompletion
27
27
  from openai.types.chat.chat_completion import Choice
28
28
  from openai.types.chat.chat_completion_message import ChatCompletionMessage
29
+ from opentelemetry import trace
29
30
 
30
31
  from datarobot_dome.chat_helper import add_citations_to_df
31
32
  from datarobot_dome.chat_helper import add_token_count_columns_to_df
@@ -57,6 +58,9 @@ from datarobot_dome.pipeline.vdb_pipeline import VDBPipeline
57
58
  from datarobot_dome.streaming import ModerationIterator
58
59
  from datarobot_dome.streaming import StreamingContextBuilder
59
60
 
61
+ tracer = trace.get_tracer(__name__)
62
+
63
+
60
64
  _logger = logging.getLogger("drum_integration")
61
65
 
62
66
 
@@ -508,7 +512,7 @@ def build_predictions_df_from_completion(data, pipeline, chat_completion):
508
512
  if pipeline_interactions:
509
513
  predictions_df[AGENTIC_PIPELINE_INTERACTIONS_ATTR] = pipeline_interactions
510
514
  else:
511
- predictions_df[AGENTIC_PIPELINE_INTERACTIONS_ATTR] = [np.nan] * len(predictions_df)
515
+ predictions_df[AGENTIC_PIPELINE_INTERACTIONS_ATTR] = [None] * len(predictions_df)
512
516
 
513
517
  source_object = chat_completion
514
518
  elif isinstance(chat_completion, Iterable):
@@ -661,6 +665,37 @@ def _is_llm_requesting_user_tool_call(completion):
661
665
  return False, completion
662
666
 
663
667
 
668
+ def __get_otel_values(guards_list, stage, result_df):
669
+ guard_values = {}
670
+ for guard in guards_list:
671
+ if not guard.has_average_score_custom_metric():
672
+ continue
673
+ guard_metric_column_name = guard.get_metric_column_name(stage)
674
+ if guard_metric_column_name not in result_df.columns:
675
+ _logger.warning(f"Missing column: {guard_metric_column_name} in result_df")
676
+ continue
677
+ guard_values[guard.get_span_column_name(stage)] = result_df[
678
+ guard_metric_column_name
679
+ ].tolist()[0]
680
+ return guard_values
681
+
682
+
683
+ def report_otel_evaluation_set_metric(pipeline, result_df):
684
+ current_span = trace.get_current_span()
685
+ if not current_span:
686
+ _logger.warning("No currently active span found to report evaluation set metric")
687
+ return
688
+
689
+ prompt_values = __get_otel_values(pipeline.get_prescore_guards(), GuardStage.PROMPT, result_df)
690
+ response_values = __get_otel_values(
691
+ pipeline.get_postscore_guards(), GuardStage.RESPONSE, result_df
692
+ )
693
+
694
+ final_value = {"prompt_guards": prompt_values, "response_guards": response_values}
695
+
696
+ current_span.set_attribute("datarobot.moderation.evaluation", json.dumps(final_value))
697
+
698
+
664
699
  def guard_chat_wrapper(
665
700
  completion_create_params, model, pipeline, drum_chat_fn, association_id=None, **kwargs
666
701
  ):
@@ -723,6 +758,7 @@ def guard_chat_wrapper(
723
758
  completion = _set_moderation_attribute_to_completion(
724
759
  pipeline, chat_completion, result_df, association_id=association_id
725
760
  )
761
+ report_otel_evaluation_set_metric(pipeline, result_df)
726
762
  return completion
727
763
 
728
764
  replaced_prompt_column_name = f"replaced_{prompt_column_name}"
@@ -802,6 +838,7 @@ def guard_chat_wrapper(
802
838
  ) / result_df.shape[0]
803
839
 
804
840
  response_message, finish_reason = get_response_message_and_finish_reason(pipeline, postscore_df)
841
+ report_otel_evaluation_set_metric(pipeline, result_df)
805
842
 
806
843
  final_completion = build_non_streaming_chat_completion(
807
844
  response_message, finish_reason, extra_attributes
datarobot_dome/guard.py CHANGED
@@ -35,6 +35,7 @@ from datarobot_dome.constants import FAITHFULLNESS_COLUMN_NAME
35
35
  from datarobot_dome.constants import NEMO_GUARD_COLUMN_NAME
36
36
  from datarobot_dome.constants import NEMO_GUARDRAILS_DIR
37
37
  from datarobot_dome.constants import ROUGE_1_COLUMN_NAME
38
+ from datarobot_dome.constants import SPAN_PREFIX
38
39
  from datarobot_dome.constants import TASK_ADHERENCE_SCORE_COLUMN_NAME
39
40
  from datarobot_dome.constants import TOKEN_COUNT_COLUMN_NAME
40
41
  from datarobot_dome.constants import AwsModel
@@ -366,6 +367,21 @@ class Guard(ABC):
366
367
  def get_comparand(self):
367
368
  return self.intervention.threshold
368
369
 
370
+ def get_enforced_span_attribute_name(self, stage):
371
+ intervention_action = self.get_intervention_action()
372
+ if intervention_action in [GuardAction.BLOCK, GuardAction.REPORT]:
373
+ return f"{SPAN_PREFIX}.{stage.lower()}.{intervention_action}ed"
374
+ elif intervention_action == GuardAction.REPLACE:
375
+ return f"{SPAN_PREFIX}.{stage.lower()}.replaced"
376
+ else:
377
+ raise NotImplementedError
378
+
379
+ def get_span_column_name(self, _):
380
+ raise NotImplementedError
381
+
382
+ def get_span_attribute_name(self, _):
383
+ raise NotImplementedError
384
+
369
385
 
370
386
  class GuardModelInfo:
371
387
  def __init__(self, model_config: dict):
@@ -434,6 +450,15 @@ class ModelGuard(Guard):
434
450
  raise NotImplementedError("Missing model_info for model guard")
435
451
  return self.get_stage_str(stage) + "_" + self._model_info.target_name
436
452
 
453
+ def get_span_column_name(self, _):
454
+ if self.model_info is None:
455
+ raise NotImplementedError("Missing model_info for model guard")
456
+ # Typically 0th index is the target name
457
+ return self._model_info.target_name.split("_")[0]
458
+
459
+ def get_span_attribute_name(self, stage):
460
+ return f"{SPAN_PREFIX}.{stage.lower()}.{self.get_span_column_name(stage)}"
461
+
437
462
  def has_average_score_custom_metric(self) -> bool:
438
463
  """A couple ModelGuard types do not have an average score metric"""
439
464
  return self.model_info.target_type not in [
@@ -566,6 +591,19 @@ class OOTBGuard(Guard):
566
591
  else:
567
592
  raise NotImplementedError(f"No metric column name defined for {self._ootb_type} guard")
568
593
 
594
+ def get_span_column_name(self, _):
595
+ if self._ootb_type == OOTBType.TOKEN_COUNT:
596
+ return TOKEN_COUNT_COLUMN_NAME
597
+ elif self._ootb_type == OOTBType.ROUGE_1:
598
+ return ROUGE_1_COLUMN_NAME
599
+ elif self._ootb_type == OOTBType.CUSTOM_METRIC:
600
+ return self.name
601
+ else:
602
+ raise NotImplementedError(f"No span attribute name defined for {self._ootb_type} guard")
603
+
604
+ def get_span_attribute_name(self, stage):
605
+ return f"{SPAN_PREFIX}.{stage.lower()}.{self.get_span_column_name(stage)}"
606
+
569
607
 
570
608
  class OOTBCostMetric(OOTBGuard):
571
609
  def __init__(self, config, stage):
@@ -603,6 +641,12 @@ class OOTBCostMetric(OOTBGuard):
603
641
  ),
604
642
  }
605
643
 
644
+ def get_span_column_name(self, _):
645
+ return f"{COST_COLUMN_NAME}.{self.currency.lower()}"
646
+
647
+ def get_span_attribute_name(self, _):
648
+ return f"{SPAN_PREFIX}.{self._stage.lower()}.{self.get_span_column_name(_)}"
649
+
606
650
 
607
651
  class FaithfulnessGuard(OOTBGuard, GuardLLMMixin):
608
652
  def __init__(self, config: dict, stage=None):
@@ -624,6 +668,12 @@ class FaithfulnessGuard(OOTBGuard, GuardLLMMixin):
624
668
  def faithfulness_evaluator(self):
625
669
  return self._evaluator
626
670
 
671
+ def get_span_column_name(self, _):
672
+ return FAITHFULLNESS_COLUMN_NAME
673
+
674
+ def get_span_attribute_name(self, _):
675
+ return f"{SPAN_PREFIX}.{self._stage.lower()}.{self.get_span_column_name(_)}"
676
+
627
677
 
628
678
  class AgentGoalAccuracyGuard(OOTBGuard, GuardLLMMixin):
629
679
  def __init__(self, config: dict, stage=None):
@@ -645,6 +695,12 @@ class AgentGoalAccuracyGuard(OOTBGuard, GuardLLMMixin):
645
695
  def accuracy_scorer(self):
646
696
  return self.scorer
647
697
 
698
+ def get_span_column_name(self, _):
699
+ return AGENT_GOAL_ACCURACY_COLUMN_NAME
700
+
701
+ def get_span_attribute_name(self, _):
702
+ return f"{SPAN_PREFIX}.{self._stage.lower()}.{self.get_span_column_name(_)}"
703
+
648
704
 
649
705
  class TaskAdherenceGuard(OOTBGuard, GuardLLMMixin):
650
706
  def __init__(self, config: dict, stage=None):
@@ -666,6 +722,12 @@ class TaskAdherenceGuard(OOTBGuard, GuardLLMMixin):
666
722
  def task_adherence_scorer(self):
667
723
  return self.scorer
668
724
 
725
+ def get_span_column_name(self, _):
726
+ return TASK_ADHERENCE_SCORE_COLUMN_NAME
727
+
728
+ def get_span_attribute_name(self, _):
729
+ return f"{SPAN_PREFIX}.{self._stage.lower()}.{self.get_span_column_name(_)}"
730
+
669
731
 
670
732
  class GuardFactory:
671
733
  @classmethod
@@ -34,7 +34,6 @@ from datarobot_dome.constants import ModerationEventTypes
34
34
  from datarobot_dome.constants import OOTBType
35
35
  from datarobot_dome.guard import AgentGoalAccuracyGuard
36
36
  from datarobot_dome.guard import FaithfulnessGuard
37
- from datarobot_dome.guard import Guard
38
37
  from datarobot_dome.guard import ModelGuard
39
38
  from datarobot_dome.guard import NeMoGuard
40
39
  from datarobot_dome.guard import OOTBCostMetric
@@ -113,6 +112,25 @@ class AsyncGuardExecutor:
113
112
  span.set_attribute("datarobot.moderation.guard.latency", latency)
114
113
  if guard.has_latency_custom_metric():
115
114
  self.pipeline.report_guard_latency(guard, latency)
115
+ if guard.has_average_score_custom_metric():
116
+ metric_column_name = guard.get_metric_column_name(stage)
117
+ if metric_column_name in df.columns:
118
+ span.set_attribute(
119
+ guard.get_span_attribute_name(stage),
120
+ df[metric_column_name].tolist()[0],
121
+ )
122
+ if guard.get_intervention_action():
123
+ (
124
+ enforced_column_name,
125
+ _,
126
+ _,
127
+ ) = self._get_enforced_and_action_column_names(
128
+ guard.get_intervention_action(), self.pipeline.get_input_column(stage)
129
+ )
130
+ span.set_attribute(
131
+ guard.get_enforced_span_attribute_name(stage),
132
+ df[enforced_column_name].tolist()[0],
133
+ )
116
134
 
117
135
  return df, latency
118
136
 
@@ -204,7 +222,7 @@ class AsyncGuardExecutor:
204
222
  # and "Response_toxicity_toxic_PREDICTION", if toxicity is configured for both
205
223
  # prompts and responses
206
224
  copy_df.rename(
207
- columns={metric_column: Guard.get_stage_str(stage) + "_" + metric_column},
225
+ columns={metric_column: guard.get_metric_column_name(stage)},
208
226
  inplace=True,
209
227
  )
210
228
  except Exception as ex:
@@ -400,7 +400,7 @@ def calculate_agent_goal_accuracy(
400
400
  interactions: str,
401
401
  response: str,
402
402
  ):
403
- if interactions is None:
403
+ if interactions is None or interactions == "":
404
404
  # If interactions are missing - we use prompt and response to gauge the
405
405
  # goal accuracy
406
406
  sample = MultiTurnSample(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: datarobot-moderations
3
- Version: 11.1.14
3
+ Version: 11.1.15
4
4
  Summary: DataRobot Monitoring and Moderation framework
5
5
  License: DataRobot Tool and Utility Agreement
6
6
  Author: DataRobot
@@ -1,11 +1,11 @@
1
1
  datarobot_dome/__init__.py,sha256=B5Rx8_CNCNsOpxBbRj27XOXCfRZmvmrAR-NzlzIKnDw,583
2
2
  datarobot_dome/async_http_client.py,sha256=wkB4irwvnchNGzO1bk2C_HWM-GOSB3AUn5TXKl-X0ZI,9649
3
3
  datarobot_dome/chat_helper.py,sha256=BzvtUyZSZxzOqq-5a2wQKhHhr2kMlcP1MFrHaDAeD_o,9671
4
- datarobot_dome/constants.py,sha256=vDU7En5Nd1bbfRIr02ReFtzZDqEg4RGCT7gdw3P0LO0,9007
5
- datarobot_dome/drum_integration.py,sha256=gRn2sQCmRs0RH0tVOdHX6amxGEX1R6WqEtOF2zdBtC4,40693
6
- datarobot_dome/guard.py,sha256=afcJSSo509aHHvM6nm-QTKzQjuWE7VzgpihenDaAf3w,29921
7
- datarobot_dome/guard_executor.py,sha256=9SuefqQRpJ_4fFm62YOPixg0Fi9z-mzR5eMPeknBT2Y,34642
8
- datarobot_dome/guard_helpers.py,sha256=VkNaoMAWAEggodpl7KmWZTM6H9H6e9Ny3Rl2HBXZnfM,16353
4
+ datarobot_dome/constants.py,sha256=mnSa8rUAha4XlsS2lwPmFCkH2RzfSL_MMkErsWHqIbA,9040
5
+ datarobot_dome/drum_integration.py,sha256=nULpLYVMiS5vihfNUyuq-nvZpgXrQibQbVu2UMAscu8,42102
6
+ datarobot_dome/guard.py,sha256=7T0a1gsWqVmVvEf4SLkVBi8lIRYl8PeMB7TnQGszWtc,32371
7
+ datarobot_dome/guard_executor.py,sha256=AOI8MZeZETHMoFgBePe0wa2vE9d2975MYQnEDHLZL7s,35462
8
+ datarobot_dome/guard_helpers.py,sha256=YHhSUSuvxAgDdWPXiwYiHtrl-6ZlObE9n6CjYPQNSuA,16375
9
9
  datarobot_dome/guards/__init__.py,sha256=B5Rx8_CNCNsOpxBbRj27XOXCfRZmvmrAR-NzlzIKnDw,583
10
10
  datarobot_dome/guards/guard_llm_mixin.py,sha256=ON-zuVL3xhQmXv0rFkalWrW_Q67Wwya2IQerHO8WkKU,10694
11
11
  datarobot_dome/llm.py,sha256=L02OvTrflmD34-FrfXebfF-zzKTeuin7fpne1Cl5psg,5719
@@ -18,6 +18,6 @@ datarobot_dome/pipeline/llm_pipeline.py,sha256=fOp_OJnQMDUJH-LKv12kEqli-EqfHjAiS
18
18
  datarobot_dome/pipeline/pipeline.py,sha256=_pZ_4K2LMnfYCYj_ur9EwJzo3T-pbO6lFYz1O-_3uQ4,16491
19
19
  datarobot_dome/pipeline/vdb_pipeline.py,sha256=WTOGn1qe_ZvEcdlvHgeXxl2xTqp7GjfL13c6S-FmAfM,5146
20
20
  datarobot_dome/streaming.py,sha256=6nYvh6SoxPRLfO6GGdEoHsQuyLP9oX1lDMe8IeGo4lw,17801
21
- datarobot_moderations-11.1.14.dist-info/METADATA,sha256=39J7-G34lxk7ULqxroi3K0RekSNmaiCnPW5OvvMzWDk,4827
22
- datarobot_moderations-11.1.14.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
23
- datarobot_moderations-11.1.14.dist-info/RECORD,,
21
+ datarobot_moderations-11.1.15.dist-info/METADATA,sha256=zHt26VnmHpn-0cL-egKPqdcTvKPTittBNtVHLVylbHo,4827
22
+ datarobot_moderations-11.1.15.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
23
+ datarobot_moderations-11.1.15.dist-info/RECORD,,