datarobot-moderations 11.1.14__py3-none-any.whl → 11.1.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datarobot_dome/constants.py +2 -0
- datarobot_dome/drum_integration.py +38 -1
- datarobot_dome/guard.py +62 -0
- datarobot_dome/guard_executor.py +20 -2
- datarobot_dome/guard_helpers.py +1 -1
- {datarobot_moderations-11.1.14.dist-info → datarobot_moderations-11.1.15.dist-info}/METADATA +1 -1
- {datarobot_moderations-11.1.14.dist-info → datarobot_moderations-11.1.15.dist-info}/RECORD +8 -8
- {datarobot_moderations-11.1.14.dist-info → datarobot_moderations-11.1.15.dist-info}/WHEEL +0 -0
datarobot_dome/constants.py
CHANGED
|
@@ -69,6 +69,8 @@ LLM_CONTEXT_COLUMN_NAME = "_LLM_CONTEXT"
|
|
|
69
69
|
PROMPT_TOKEN_COUNT_COLUMN_NAME_FROM_USAGE = "prompt_token_count_from_usage"
|
|
70
70
|
RESPONSE_TOKEN_COUNT_COLUMN_NAME_FROM_USAGE = "response_token_count_from_usage"
|
|
71
71
|
|
|
72
|
+
SPAN_PREFIX = "datarobot.guard"
|
|
73
|
+
|
|
72
74
|
|
|
73
75
|
class TargetType(str, Enum):
|
|
74
76
|
"""Target types that may be handed to moderations from DRUM -- casing must align."""
|
|
@@ -26,6 +26,7 @@ from openai.types.chat import ChatCompletionChunk
|
|
|
26
26
|
from openai.types.chat.chat_completion import ChatCompletion
|
|
27
27
|
from openai.types.chat.chat_completion import Choice
|
|
28
28
|
from openai.types.chat.chat_completion_message import ChatCompletionMessage
|
|
29
|
+
from opentelemetry import trace
|
|
29
30
|
|
|
30
31
|
from datarobot_dome.chat_helper import add_citations_to_df
|
|
31
32
|
from datarobot_dome.chat_helper import add_token_count_columns_to_df
|
|
@@ -57,6 +58,9 @@ from datarobot_dome.pipeline.vdb_pipeline import VDBPipeline
|
|
|
57
58
|
from datarobot_dome.streaming import ModerationIterator
|
|
58
59
|
from datarobot_dome.streaming import StreamingContextBuilder
|
|
59
60
|
|
|
61
|
+
tracer = trace.get_tracer(__name__)
|
|
62
|
+
|
|
63
|
+
|
|
60
64
|
_logger = logging.getLogger("drum_integration")
|
|
61
65
|
|
|
62
66
|
|
|
@@ -508,7 +512,7 @@ def build_predictions_df_from_completion(data, pipeline, chat_completion):
|
|
|
508
512
|
if pipeline_interactions:
|
|
509
513
|
predictions_df[AGENTIC_PIPELINE_INTERACTIONS_ATTR] = pipeline_interactions
|
|
510
514
|
else:
|
|
511
|
-
predictions_df[AGENTIC_PIPELINE_INTERACTIONS_ATTR] = [
|
|
515
|
+
predictions_df[AGENTIC_PIPELINE_INTERACTIONS_ATTR] = [None] * len(predictions_df)
|
|
512
516
|
|
|
513
517
|
source_object = chat_completion
|
|
514
518
|
elif isinstance(chat_completion, Iterable):
|
|
@@ -661,6 +665,37 @@ def _is_llm_requesting_user_tool_call(completion):
|
|
|
661
665
|
return False, completion
|
|
662
666
|
|
|
663
667
|
|
|
668
|
+
def __get_otel_values(guards_list, stage, result_df):
|
|
669
|
+
guard_values = {}
|
|
670
|
+
for guard in guards_list:
|
|
671
|
+
if not guard.has_average_score_custom_metric():
|
|
672
|
+
continue
|
|
673
|
+
guard_metric_column_name = guard.get_metric_column_name(stage)
|
|
674
|
+
if guard_metric_column_name not in result_df.columns:
|
|
675
|
+
_logger.warning(f"Missing column: {guard_metric_column_name} in result_df")
|
|
676
|
+
continue
|
|
677
|
+
guard_values[guard.get_span_column_name(stage)] = result_df[
|
|
678
|
+
guard_metric_column_name
|
|
679
|
+
].tolist()[0]
|
|
680
|
+
return guard_values
|
|
681
|
+
|
|
682
|
+
|
|
683
|
+
def report_otel_evaluation_set_metric(pipeline, result_df):
|
|
684
|
+
current_span = trace.get_current_span()
|
|
685
|
+
if not current_span:
|
|
686
|
+
_logger.warning("No currently active span found to report evaluation set metric")
|
|
687
|
+
return
|
|
688
|
+
|
|
689
|
+
prompt_values = __get_otel_values(pipeline.get_prescore_guards(), GuardStage.PROMPT, result_df)
|
|
690
|
+
response_values = __get_otel_values(
|
|
691
|
+
pipeline.get_postscore_guards(), GuardStage.RESPONSE, result_df
|
|
692
|
+
)
|
|
693
|
+
|
|
694
|
+
final_value = {"prompt_guards": prompt_values, "response_guards": response_values}
|
|
695
|
+
|
|
696
|
+
current_span.set_attribute("datarobot.moderation.evaluation", json.dumps(final_value))
|
|
697
|
+
|
|
698
|
+
|
|
664
699
|
def guard_chat_wrapper(
|
|
665
700
|
completion_create_params, model, pipeline, drum_chat_fn, association_id=None, **kwargs
|
|
666
701
|
):
|
|
@@ -723,6 +758,7 @@ def guard_chat_wrapper(
|
|
|
723
758
|
completion = _set_moderation_attribute_to_completion(
|
|
724
759
|
pipeline, chat_completion, result_df, association_id=association_id
|
|
725
760
|
)
|
|
761
|
+
report_otel_evaluation_set_metric(pipeline, result_df)
|
|
726
762
|
return completion
|
|
727
763
|
|
|
728
764
|
replaced_prompt_column_name = f"replaced_{prompt_column_name}"
|
|
@@ -802,6 +838,7 @@ def guard_chat_wrapper(
|
|
|
802
838
|
) / result_df.shape[0]
|
|
803
839
|
|
|
804
840
|
response_message, finish_reason = get_response_message_and_finish_reason(pipeline, postscore_df)
|
|
841
|
+
report_otel_evaluation_set_metric(pipeline, result_df)
|
|
805
842
|
|
|
806
843
|
final_completion = build_non_streaming_chat_completion(
|
|
807
844
|
response_message, finish_reason, extra_attributes
|
datarobot_dome/guard.py
CHANGED
|
@@ -35,6 +35,7 @@ from datarobot_dome.constants import FAITHFULLNESS_COLUMN_NAME
|
|
|
35
35
|
from datarobot_dome.constants import NEMO_GUARD_COLUMN_NAME
|
|
36
36
|
from datarobot_dome.constants import NEMO_GUARDRAILS_DIR
|
|
37
37
|
from datarobot_dome.constants import ROUGE_1_COLUMN_NAME
|
|
38
|
+
from datarobot_dome.constants import SPAN_PREFIX
|
|
38
39
|
from datarobot_dome.constants import TASK_ADHERENCE_SCORE_COLUMN_NAME
|
|
39
40
|
from datarobot_dome.constants import TOKEN_COUNT_COLUMN_NAME
|
|
40
41
|
from datarobot_dome.constants import AwsModel
|
|
@@ -366,6 +367,21 @@ class Guard(ABC):
|
|
|
366
367
|
def get_comparand(self):
|
|
367
368
|
return self.intervention.threshold
|
|
368
369
|
|
|
370
|
+
def get_enforced_span_attribute_name(self, stage):
|
|
371
|
+
intervention_action = self.get_intervention_action()
|
|
372
|
+
if intervention_action in [GuardAction.BLOCK, GuardAction.REPORT]:
|
|
373
|
+
return f"{SPAN_PREFIX}.{stage.lower()}.{intervention_action}ed"
|
|
374
|
+
elif intervention_action == GuardAction.REPLACE:
|
|
375
|
+
return f"{SPAN_PREFIX}.{stage.lower()}.replaced"
|
|
376
|
+
else:
|
|
377
|
+
raise NotImplementedError
|
|
378
|
+
|
|
379
|
+
def get_span_column_name(self, _):
|
|
380
|
+
raise NotImplementedError
|
|
381
|
+
|
|
382
|
+
def get_span_attribute_name(self, _):
|
|
383
|
+
raise NotImplementedError
|
|
384
|
+
|
|
369
385
|
|
|
370
386
|
class GuardModelInfo:
|
|
371
387
|
def __init__(self, model_config: dict):
|
|
@@ -434,6 +450,15 @@ class ModelGuard(Guard):
|
|
|
434
450
|
raise NotImplementedError("Missing model_info for model guard")
|
|
435
451
|
return self.get_stage_str(stage) + "_" + self._model_info.target_name
|
|
436
452
|
|
|
453
|
+
def get_span_column_name(self, _):
|
|
454
|
+
if self.model_info is None:
|
|
455
|
+
raise NotImplementedError("Missing model_info for model guard")
|
|
456
|
+
# Typically 0th index is the target name
|
|
457
|
+
return self._model_info.target_name.split("_")[0]
|
|
458
|
+
|
|
459
|
+
def get_span_attribute_name(self, stage):
|
|
460
|
+
return f"{SPAN_PREFIX}.{stage.lower()}.{self.get_span_column_name(stage)}"
|
|
461
|
+
|
|
437
462
|
def has_average_score_custom_metric(self) -> bool:
|
|
438
463
|
"""A couple ModelGuard types do not have an average score metric"""
|
|
439
464
|
return self.model_info.target_type not in [
|
|
@@ -566,6 +591,19 @@ class OOTBGuard(Guard):
|
|
|
566
591
|
else:
|
|
567
592
|
raise NotImplementedError(f"No metric column name defined for {self._ootb_type} guard")
|
|
568
593
|
|
|
594
|
+
def get_span_column_name(self, _):
|
|
595
|
+
if self._ootb_type == OOTBType.TOKEN_COUNT:
|
|
596
|
+
return TOKEN_COUNT_COLUMN_NAME
|
|
597
|
+
elif self._ootb_type == OOTBType.ROUGE_1:
|
|
598
|
+
return ROUGE_1_COLUMN_NAME
|
|
599
|
+
elif self._ootb_type == OOTBType.CUSTOM_METRIC:
|
|
600
|
+
return self.name
|
|
601
|
+
else:
|
|
602
|
+
raise NotImplementedError(f"No span attribute name defined for {self._ootb_type} guard")
|
|
603
|
+
|
|
604
|
+
def get_span_attribute_name(self, stage):
|
|
605
|
+
return f"{SPAN_PREFIX}.{stage.lower()}.{self.get_span_column_name(stage)}"
|
|
606
|
+
|
|
569
607
|
|
|
570
608
|
class OOTBCostMetric(OOTBGuard):
|
|
571
609
|
def __init__(self, config, stage):
|
|
@@ -603,6 +641,12 @@ class OOTBCostMetric(OOTBGuard):
|
|
|
603
641
|
),
|
|
604
642
|
}
|
|
605
643
|
|
|
644
|
+
def get_span_column_name(self, _):
|
|
645
|
+
return f"{COST_COLUMN_NAME}.{self.currency.lower()}"
|
|
646
|
+
|
|
647
|
+
def get_span_attribute_name(self, _):
|
|
648
|
+
return f"{SPAN_PREFIX}.{self._stage.lower()}.{self.get_span_column_name(_)}"
|
|
649
|
+
|
|
606
650
|
|
|
607
651
|
class FaithfulnessGuard(OOTBGuard, GuardLLMMixin):
|
|
608
652
|
def __init__(self, config: dict, stage=None):
|
|
@@ -624,6 +668,12 @@ class FaithfulnessGuard(OOTBGuard, GuardLLMMixin):
|
|
|
624
668
|
def faithfulness_evaluator(self):
|
|
625
669
|
return self._evaluator
|
|
626
670
|
|
|
671
|
+
def get_span_column_name(self, _):
|
|
672
|
+
return FAITHFULLNESS_COLUMN_NAME
|
|
673
|
+
|
|
674
|
+
def get_span_attribute_name(self, _):
|
|
675
|
+
return f"{SPAN_PREFIX}.{self._stage.lower()}.{self.get_span_column_name(_)}"
|
|
676
|
+
|
|
627
677
|
|
|
628
678
|
class AgentGoalAccuracyGuard(OOTBGuard, GuardLLMMixin):
|
|
629
679
|
def __init__(self, config: dict, stage=None):
|
|
@@ -645,6 +695,12 @@ class AgentGoalAccuracyGuard(OOTBGuard, GuardLLMMixin):
|
|
|
645
695
|
def accuracy_scorer(self):
|
|
646
696
|
return self.scorer
|
|
647
697
|
|
|
698
|
+
def get_span_column_name(self, _):
|
|
699
|
+
return AGENT_GOAL_ACCURACY_COLUMN_NAME
|
|
700
|
+
|
|
701
|
+
def get_span_attribute_name(self, _):
|
|
702
|
+
return f"{SPAN_PREFIX}.{self._stage.lower()}.{self.get_span_column_name(_)}"
|
|
703
|
+
|
|
648
704
|
|
|
649
705
|
class TaskAdherenceGuard(OOTBGuard, GuardLLMMixin):
|
|
650
706
|
def __init__(self, config: dict, stage=None):
|
|
@@ -666,6 +722,12 @@ class TaskAdherenceGuard(OOTBGuard, GuardLLMMixin):
|
|
|
666
722
|
def task_adherence_scorer(self):
|
|
667
723
|
return self.scorer
|
|
668
724
|
|
|
725
|
+
def get_span_column_name(self, _):
|
|
726
|
+
return TASK_ADHERENCE_SCORE_COLUMN_NAME
|
|
727
|
+
|
|
728
|
+
def get_span_attribute_name(self, _):
|
|
729
|
+
return f"{SPAN_PREFIX}.{self._stage.lower()}.{self.get_span_column_name(_)}"
|
|
730
|
+
|
|
669
731
|
|
|
670
732
|
class GuardFactory:
|
|
671
733
|
@classmethod
|
datarobot_dome/guard_executor.py
CHANGED
|
@@ -34,7 +34,6 @@ from datarobot_dome.constants import ModerationEventTypes
|
|
|
34
34
|
from datarobot_dome.constants import OOTBType
|
|
35
35
|
from datarobot_dome.guard import AgentGoalAccuracyGuard
|
|
36
36
|
from datarobot_dome.guard import FaithfulnessGuard
|
|
37
|
-
from datarobot_dome.guard import Guard
|
|
38
37
|
from datarobot_dome.guard import ModelGuard
|
|
39
38
|
from datarobot_dome.guard import NeMoGuard
|
|
40
39
|
from datarobot_dome.guard import OOTBCostMetric
|
|
@@ -113,6 +112,25 @@ class AsyncGuardExecutor:
|
|
|
113
112
|
span.set_attribute("datarobot.moderation.guard.latency", latency)
|
|
114
113
|
if guard.has_latency_custom_metric():
|
|
115
114
|
self.pipeline.report_guard_latency(guard, latency)
|
|
115
|
+
if guard.has_average_score_custom_metric():
|
|
116
|
+
metric_column_name = guard.get_metric_column_name(stage)
|
|
117
|
+
if metric_column_name in df.columns:
|
|
118
|
+
span.set_attribute(
|
|
119
|
+
guard.get_span_attribute_name(stage),
|
|
120
|
+
df[metric_column_name].tolist()[0],
|
|
121
|
+
)
|
|
122
|
+
if guard.get_intervention_action():
|
|
123
|
+
(
|
|
124
|
+
enforced_column_name,
|
|
125
|
+
_,
|
|
126
|
+
_,
|
|
127
|
+
) = self._get_enforced_and_action_column_names(
|
|
128
|
+
guard.get_intervention_action(), self.pipeline.get_input_column(stage)
|
|
129
|
+
)
|
|
130
|
+
span.set_attribute(
|
|
131
|
+
guard.get_enforced_span_attribute_name(stage),
|
|
132
|
+
df[enforced_column_name].tolist()[0],
|
|
133
|
+
)
|
|
116
134
|
|
|
117
135
|
return df, latency
|
|
118
136
|
|
|
@@ -204,7 +222,7 @@ class AsyncGuardExecutor:
|
|
|
204
222
|
# and "Response_toxicity_toxic_PREDICTION", if toxicity is configured for both
|
|
205
223
|
# prompts and responses
|
|
206
224
|
copy_df.rename(
|
|
207
|
-
columns={metric_column:
|
|
225
|
+
columns={metric_column: guard.get_metric_column_name(stage)},
|
|
208
226
|
inplace=True,
|
|
209
227
|
)
|
|
210
228
|
except Exception as ex:
|
datarobot_dome/guard_helpers.py
CHANGED
|
@@ -400,7 +400,7 @@ def calculate_agent_goal_accuracy(
|
|
|
400
400
|
interactions: str,
|
|
401
401
|
response: str,
|
|
402
402
|
):
|
|
403
|
-
if interactions is None:
|
|
403
|
+
if interactions is None or interactions == "":
|
|
404
404
|
# If interactions are missing - we use prompt and response to gauge the
|
|
405
405
|
# goal accuracy
|
|
406
406
|
sample = MultiTurnSample(
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
datarobot_dome/__init__.py,sha256=B5Rx8_CNCNsOpxBbRj27XOXCfRZmvmrAR-NzlzIKnDw,583
|
|
2
2
|
datarobot_dome/async_http_client.py,sha256=wkB4irwvnchNGzO1bk2C_HWM-GOSB3AUn5TXKl-X0ZI,9649
|
|
3
3
|
datarobot_dome/chat_helper.py,sha256=BzvtUyZSZxzOqq-5a2wQKhHhr2kMlcP1MFrHaDAeD_o,9671
|
|
4
|
-
datarobot_dome/constants.py,sha256=
|
|
5
|
-
datarobot_dome/drum_integration.py,sha256=
|
|
6
|
-
datarobot_dome/guard.py,sha256=
|
|
7
|
-
datarobot_dome/guard_executor.py,sha256=
|
|
8
|
-
datarobot_dome/guard_helpers.py,sha256=
|
|
4
|
+
datarobot_dome/constants.py,sha256=mnSa8rUAha4XlsS2lwPmFCkH2RzfSL_MMkErsWHqIbA,9040
|
|
5
|
+
datarobot_dome/drum_integration.py,sha256=nULpLYVMiS5vihfNUyuq-nvZpgXrQibQbVu2UMAscu8,42102
|
|
6
|
+
datarobot_dome/guard.py,sha256=7T0a1gsWqVmVvEf4SLkVBi8lIRYl8PeMB7TnQGszWtc,32371
|
|
7
|
+
datarobot_dome/guard_executor.py,sha256=AOI8MZeZETHMoFgBePe0wa2vE9d2975MYQnEDHLZL7s,35462
|
|
8
|
+
datarobot_dome/guard_helpers.py,sha256=YHhSUSuvxAgDdWPXiwYiHtrl-6ZlObE9n6CjYPQNSuA,16375
|
|
9
9
|
datarobot_dome/guards/__init__.py,sha256=B5Rx8_CNCNsOpxBbRj27XOXCfRZmvmrAR-NzlzIKnDw,583
|
|
10
10
|
datarobot_dome/guards/guard_llm_mixin.py,sha256=ON-zuVL3xhQmXv0rFkalWrW_Q67Wwya2IQerHO8WkKU,10694
|
|
11
11
|
datarobot_dome/llm.py,sha256=L02OvTrflmD34-FrfXebfF-zzKTeuin7fpne1Cl5psg,5719
|
|
@@ -18,6 +18,6 @@ datarobot_dome/pipeline/llm_pipeline.py,sha256=fOp_OJnQMDUJH-LKv12kEqli-EqfHjAiS
|
|
|
18
18
|
datarobot_dome/pipeline/pipeline.py,sha256=_pZ_4K2LMnfYCYj_ur9EwJzo3T-pbO6lFYz1O-_3uQ4,16491
|
|
19
19
|
datarobot_dome/pipeline/vdb_pipeline.py,sha256=WTOGn1qe_ZvEcdlvHgeXxl2xTqp7GjfL13c6S-FmAfM,5146
|
|
20
20
|
datarobot_dome/streaming.py,sha256=6nYvh6SoxPRLfO6GGdEoHsQuyLP9oX1lDMe8IeGo4lw,17801
|
|
21
|
-
datarobot_moderations-11.1.
|
|
22
|
-
datarobot_moderations-11.1.
|
|
23
|
-
datarobot_moderations-11.1.
|
|
21
|
+
datarobot_moderations-11.1.15.dist-info/METADATA,sha256=zHt26VnmHpn-0cL-egKPqdcTvKPTittBNtVHLVylbHo,4827
|
|
22
|
+
datarobot_moderations-11.1.15.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
23
|
+
datarobot_moderations-11.1.15.dist-info/RECORD,,
|
|
File without changes
|