datarobot-moderations 11.2.10__py3-none-any.whl → 11.2.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datarobot_dome/__init__.py +2 -2
- datarobot_dome/async_http_client.py +2 -2
- datarobot_dome/chat_helper.py +2 -2
- datarobot_dome/constants.py +29 -4
- datarobot_dome/drum_integration.py +2 -2
- datarobot_dome/guard_executor.py +130 -17
- datarobot_dome/guard_factory.py +126 -0
- datarobot_dome/guard_helpers.py +17 -2
- datarobot_dome/guards/__init__.py +17 -2
- datarobot_dome/guards/base.py +259 -0
- datarobot_dome/guards/guard_llm_mixin.py +4 -2
- datarobot_dome/guards/model_guard.py +84 -0
- datarobot_dome/guards/nemo_evaluator.py +329 -0
- datarobot_dome/guards/nemo_guard.py +148 -0
- datarobot_dome/guards/ootb_guard.py +209 -0
- datarobot_dome/guards/validation.py +234 -0
- datarobot_dome/llm.py +2 -2
- datarobot_dome/metrics/__init__.py +2 -2
- datarobot_dome/metrics/citation_metrics.py +2 -2
- datarobot_dome/metrics/factory.py +2 -2
- datarobot_dome/metrics/metric_scorer.py +2 -2
- datarobot_dome/pipeline/__init__.py +2 -2
- datarobot_dome/pipeline/llm_pipeline.py +4 -4
- datarobot_dome/pipeline/pipeline.py +2 -2
- datarobot_dome/pipeline/vdb_pipeline.py +2 -2
- datarobot_dome/runtime.py +2 -2
- datarobot_dome/streaming.py +3 -3
- {datarobot_moderations-11.2.10.dist-info → datarobot_moderations-11.2.12.dist-info}/METADATA +3 -2
- datarobot_moderations-11.2.12.dist-info/RECORD +30 -0
- {datarobot_moderations-11.2.10.dist-info → datarobot_moderations-11.2.12.dist-info}/WHEEL +1 -1
- datarobot_dome/guard.py +0 -845
- datarobot_moderations-11.2.10.dist-info/RECORD +0 -24
datarobot_dome/__init__.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# ---------------------------------------------------------------------------------
|
|
2
|
-
# Copyright (c)
|
|
3
|
-
# Last updated
|
|
2
|
+
# Copyright (c) 2026 DataRobot, Inc. and its affiliates. All rights reserved.
|
|
3
|
+
# Last updated 2026.
|
|
4
4
|
#
|
|
5
5
|
# DataRobot, Inc. Confidential.
|
|
6
6
|
# This is proprietary source code of DataRobot, Inc. and its affiliates.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# ---------------------------------------------------------------------------------
|
|
2
|
-
# Copyright (c)
|
|
3
|
-
# Last updated
|
|
2
|
+
# Copyright (c) 2026 DataRobot, Inc. and its affiliates. All rights reserved.
|
|
3
|
+
# Last updated 2026.
|
|
4
4
|
#
|
|
5
5
|
# DataRobot, Inc. Confidential.
|
|
6
6
|
# This is proprietary source code of DataRobot, Inc. and its affiliates.
|
datarobot_dome/chat_helper.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# ---------------------------------------------------------------------------------
|
|
2
|
-
# Copyright (c)
|
|
3
|
-
# Last updated
|
|
2
|
+
# Copyright (c) 2026 DataRobot, Inc. and its affiliates. All rights reserved.
|
|
3
|
+
# Last updated 2026.
|
|
4
4
|
#
|
|
5
5
|
# DataRobot, Inc. Confidential.
|
|
6
6
|
# This is proprietary source code of DataRobot, Inc. and its affiliates.
|
datarobot_dome/constants.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# ---------------------------------------------------------------------------------
|
|
2
|
-
# Copyright (c)
|
|
3
|
-
# Last updated
|
|
2
|
+
# Copyright (c) 2026 DataRobot, Inc. and its affiliates. All rights reserved.
|
|
3
|
+
# Last updated 2026.
|
|
4
4
|
#
|
|
5
5
|
# DataRobot, Inc. Confidential.
|
|
6
6
|
# This is proprietary source code of DataRobot, Inc. and its affiliates.
|
|
@@ -31,6 +31,7 @@ COST_COLUMN_NAME = "cost"
|
|
|
31
31
|
FAITHFULLNESS_COLUMN_NAME = "faithfulness"
|
|
32
32
|
AGENT_GOAL_ACCURACY_COLUMN_NAME = "agent_goal_accuracy"
|
|
33
33
|
TASK_ADHERENCE_SCORE_COLUMN_NAME = "task_adherence_score"
|
|
34
|
+
GUIDELINE_ADHERENCE_COLUMN_NAME = "agent_guideline_adherence_score"
|
|
34
35
|
|
|
35
36
|
CUSTOM_METRIC_DESCRIPTION_SUFFIX = "Created by DataRobot Moderation System"
|
|
36
37
|
|
|
@@ -105,8 +106,9 @@ class GuardType:
|
|
|
105
106
|
OOTB = "ootb" # Out of the Box
|
|
106
107
|
MODEL = "model" # wraps a deployed model
|
|
107
108
|
NEMO_GUARDRAILS = "nemo_guardrails" # Nemo guardrails
|
|
109
|
+
NEMO_EVALUATOR = "nemo_evaluator" # Nemo evaluator
|
|
108
110
|
|
|
109
|
-
ALL = [MODEL, NEMO_GUARDRAILS,
|
|
111
|
+
ALL = [OOTB, MODEL, NEMO_GUARDRAILS, NEMO_EVALUATOR]
|
|
110
112
|
|
|
111
113
|
|
|
112
114
|
class OOTBType:
|
|
@@ -117,6 +119,7 @@ class OOTBType:
|
|
|
117
119
|
CUSTOM_METRIC = "custom_metric"
|
|
118
120
|
COST = "cost"
|
|
119
121
|
TASK_ADHERENCE = "task_adherence"
|
|
122
|
+
GUIDELINE_ADHERENCE = "agent_guideline_adherence"
|
|
120
123
|
|
|
121
124
|
ALL = [
|
|
122
125
|
TOKEN_COUNT,
|
|
@@ -126,6 +129,27 @@ class OOTBType:
|
|
|
126
129
|
COST,
|
|
127
130
|
AGENT_GOAL_ACCURACY,
|
|
128
131
|
TASK_ADHERENCE,
|
|
132
|
+
GUIDELINE_ADHERENCE,
|
|
133
|
+
]
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
class NemoEvaluatorType:
|
|
137
|
+
LLM_JUDGE = "llm_judge"
|
|
138
|
+
CONTEXT_RELEVANCE = "context_relevance"
|
|
139
|
+
RESPONSE_GROUNDEDNESS = "response_groundedness"
|
|
140
|
+
TOPIC_ADHERENCE = "topic_adherence"
|
|
141
|
+
AGENT_GOAL_ACCURACY = "agent_goal_accuracy"
|
|
142
|
+
RESPONSE_RELEVANCY = "response_relevancy"
|
|
143
|
+
FAITHFULNESS = "faithfulness"
|
|
144
|
+
|
|
145
|
+
ALL = [
|
|
146
|
+
LLM_JUDGE,
|
|
147
|
+
CONTEXT_RELEVANCE,
|
|
148
|
+
RESPONSE_GROUNDEDNESS,
|
|
149
|
+
TOPIC_ADHERENCE,
|
|
150
|
+
AGENT_GOAL_ACCURACY,
|
|
151
|
+
RESPONSE_RELEVANCY,
|
|
152
|
+
FAITHFULNESS,
|
|
129
153
|
]
|
|
130
154
|
|
|
131
155
|
|
|
@@ -230,8 +254,9 @@ class GuardLLMType:
|
|
|
230
254
|
AMAZON = "amazon"
|
|
231
255
|
DATAROBOT = "datarobot"
|
|
232
256
|
NIM = "nim"
|
|
257
|
+
LLM_GATEWAY = "llmGateway"
|
|
233
258
|
|
|
234
|
-
ALL = [OPENAI, AZURE_OPENAI, GOOGLE, AMAZON, DATAROBOT, NIM]
|
|
259
|
+
ALL = [OPENAI, AZURE_OPENAI, GOOGLE, AMAZON, DATAROBOT, NIM, LLM_GATEWAY]
|
|
235
260
|
|
|
236
261
|
|
|
237
262
|
class GoogleModel:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# ---------------------------------------------------------------------------------
|
|
2
|
-
# Copyright (c)
|
|
3
|
-
# Last updated
|
|
2
|
+
# Copyright (c) 2026 DataRobot, Inc. and its affiliates. All rights reserved.
|
|
3
|
+
# Last updated 2026.
|
|
4
4
|
#
|
|
5
5
|
# DataRobot, Inc. Confidential.
|
|
6
6
|
# This is proprietary source code of DataRobot, Inc. and its affiliates.
|
datarobot_dome/guard_executor.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# ---------------------------------------------------------------------------------
|
|
2
|
-
# Copyright (c)
|
|
3
|
-
# Last updated
|
|
2
|
+
# Copyright (c) 2026 DataRobot, Inc. and its affiliates. All rights reserved.
|
|
3
|
+
# Last updated 2026.
|
|
4
4
|
#
|
|
5
5
|
# DataRobot, Inc. Confidential.
|
|
6
6
|
# This is proprietary source code of DataRobot, Inc. and its affiliates.
|
|
@@ -31,15 +31,10 @@ from datarobot_dome.constants import GuardStage
|
|
|
31
31
|
from datarobot_dome.constants import GuardTimeoutAction
|
|
32
32
|
from datarobot_dome.constants import GuardType
|
|
33
33
|
from datarobot_dome.constants import ModerationEventTypes
|
|
34
|
+
from datarobot_dome.constants import NemoEvaluatorType
|
|
34
35
|
from datarobot_dome.constants import OOTBType
|
|
35
|
-
from datarobot_dome.guard import AgentGoalAccuracyGuard
|
|
36
|
-
from datarobot_dome.guard import FaithfulnessGuard
|
|
37
|
-
from datarobot_dome.guard import ModelGuard
|
|
38
|
-
from datarobot_dome.guard import NeMoGuard
|
|
39
|
-
from datarobot_dome.guard import OOTBCostMetric
|
|
40
|
-
from datarobot_dome.guard import OOTBGuard
|
|
41
|
-
from datarobot_dome.guard import TaskAdherenceGuard
|
|
42
36
|
from datarobot_dome.guard_helpers import calculate_agent_goal_accuracy
|
|
37
|
+
from datarobot_dome.guard_helpers import calculate_agent_guideline_adherence
|
|
43
38
|
from datarobot_dome.guard_helpers import calculate_faithfulness
|
|
44
39
|
from datarobot_dome.guard_helpers import calculate_task_adherence
|
|
45
40
|
from datarobot_dome.guard_helpers import calculate_token_counts_for_cost_calculations
|
|
@@ -48,6 +43,15 @@ from datarobot_dome.guard_helpers import get_rouge_1_score
|
|
|
48
43
|
from datarobot_dome.guard_helpers import get_token_count
|
|
49
44
|
from datarobot_dome.guard_helpers import nemo_response_stage_input_formatter
|
|
50
45
|
from datarobot_dome.guard_helpers import nemo_response_stage_output_formatter
|
|
46
|
+
from datarobot_dome.guards import ModelGuard
|
|
47
|
+
from datarobot_dome.guards import NeMoEvaluatorGuard
|
|
48
|
+
from datarobot_dome.guards import NeMoGuard
|
|
49
|
+
from datarobot_dome.guards import OOTBAgentGoalAccuracyGuard
|
|
50
|
+
from datarobot_dome.guards import OOTBCostMetric
|
|
51
|
+
from datarobot_dome.guards import OOTBFaithfulnessGuard
|
|
52
|
+
from datarobot_dome.guards import OOTBGuard
|
|
53
|
+
from datarobot_dome.guards import OOTBTaskAdherenceGuard
|
|
54
|
+
from datarobot_dome.guards.ootb_guard import OOTBAgentGuidelineAdherence
|
|
51
55
|
|
|
52
56
|
tracer = trace.get_tracer(__name__)
|
|
53
57
|
|
|
@@ -86,6 +90,7 @@ class AsyncGuardExecutor:
|
|
|
86
90
|
GuardType.MODEL: "run_model_guard",
|
|
87
91
|
GuardType.OOTB: "run_ootb_guard",
|
|
88
92
|
GuardType.NEMO_GUARDRAILS: "run_nemo_guard",
|
|
93
|
+
GuardType.NEMO_EVALUATOR: "run_nemo_evaluator_guard",
|
|
89
94
|
}
|
|
90
95
|
|
|
91
96
|
def __init__(self, pipeline):
|
|
@@ -173,7 +178,7 @@ class AsyncGuardExecutor:
|
|
|
173
178
|
metric_column = guard.model_info.target_name
|
|
174
179
|
|
|
175
180
|
llm_input_column = self.pipeline.get_input_column(stage)
|
|
176
|
-
guard_input_column = guard.
|
|
181
|
+
guard_input_column = guard.get_input_column_name(stage)
|
|
177
182
|
|
|
178
183
|
intervene = self._should_intervene(guard)
|
|
179
184
|
try:
|
|
@@ -335,9 +340,9 @@ class AsyncGuardExecutor:
|
|
|
335
340
|
return self.intervene(guard, copy_df, input_column, metric_column)
|
|
336
341
|
|
|
337
342
|
async def _handle_faithfulness(self, guard, copy_df, stage, intervene):
|
|
338
|
-
if not isinstance(guard,
|
|
343
|
+
if not isinstance(guard, OOTBFaithfulnessGuard):
|
|
339
344
|
raise ValueError(
|
|
340
|
-
f"Guard object should be of type
|
|
345
|
+
f"Guard object should be of type OOTBFaithfulnessGuard, got: {type(guard)}"
|
|
341
346
|
)
|
|
342
347
|
if stage == GuardStage.PROMPT:
|
|
343
348
|
raise ValueError("Faithfulness only supports evaluating the response")
|
|
@@ -387,9 +392,9 @@ class AsyncGuardExecutor:
|
|
|
387
392
|
return copy_df, intervene
|
|
388
393
|
|
|
389
394
|
async def _handle_agent_goal_accuracy(self, guard, copy_df, stage):
|
|
390
|
-
if not isinstance(guard,
|
|
395
|
+
if not isinstance(guard, OOTBAgentGoalAccuracyGuard):
|
|
391
396
|
raise ValueError(
|
|
392
|
-
f"Guard object should be of type
|
|
397
|
+
f"Guard object should be of type OOTBAgentGoalAccuracyGuard, got: {type(guard)}"
|
|
393
398
|
)
|
|
394
399
|
if stage == GuardStage.PROMPT:
|
|
395
400
|
raise ValueError("Agent Goal Accuracy only supports evaluating the response")
|
|
@@ -434,9 +439,9 @@ class AsyncGuardExecutor:
|
|
|
434
439
|
return copy_df
|
|
435
440
|
|
|
436
441
|
async def _handle_task_adherence(self, guard, copy_df, stage):
|
|
437
|
-
if not isinstance(guard,
|
|
442
|
+
if not isinstance(guard, OOTBTaskAdherenceGuard):
|
|
438
443
|
raise ValueError(
|
|
439
|
-
f"Guard object should be of type
|
|
444
|
+
f"Guard object should be of type OOTBTaskAdherenceGuard, got: {type(guard)}"
|
|
440
445
|
)
|
|
441
446
|
if stage == GuardStage.PROMPT:
|
|
442
447
|
raise ValueError("Task Adherence only supports evaluating the response")
|
|
@@ -511,6 +516,46 @@ class AsyncGuardExecutor:
|
|
|
511
516
|
)
|
|
512
517
|
return copy_df, intervene
|
|
513
518
|
|
|
519
|
+
async def _handle_guideline_adherence(self, guard, copy_df, stage):
|
|
520
|
+
if not isinstance(guard, OOTBAgentGuidelineAdherence):
|
|
521
|
+
raise ValueError(
|
|
522
|
+
f"Guard object should be of type AgentGuidelineAdherence, got: {type(guard)}"
|
|
523
|
+
)
|
|
524
|
+
|
|
525
|
+
if stage != GuardStage.RESPONSE:
|
|
526
|
+
raise ValueError("Guideline Adherence only supports evaluating the response")
|
|
527
|
+
|
|
528
|
+
citation_columns = get_citation_columns(copy_df.columns)
|
|
529
|
+
prompt_column_name = self.pipeline.get_input_column(GuardStage.PROMPT)
|
|
530
|
+
response_column_name = self.pipeline.get_input_column(GuardStage.RESPONSE)
|
|
531
|
+
metric_column_name = guard.metric_column_name
|
|
532
|
+
try:
|
|
533
|
+
copy_df[metric_column_name] = copy_df.apply(
|
|
534
|
+
lambda x: calculate_agent_guideline_adherence(
|
|
535
|
+
scorer=guard.guideline_adherence_scorer,
|
|
536
|
+
prompt=x[prompt_column_name],
|
|
537
|
+
response=x[response_column_name],
|
|
538
|
+
citations=[x[col] for col in citation_columns]
|
|
539
|
+
if len(citation_columns) > 0
|
|
540
|
+
else None,
|
|
541
|
+
),
|
|
542
|
+
axis=1,
|
|
543
|
+
)
|
|
544
|
+
|
|
545
|
+
except Exception as e:
|
|
546
|
+
title = "Guideline Adherence calculation failed"
|
|
547
|
+
message = f"Exception: {e}"
|
|
548
|
+
self._logger.error(title + " " + message)
|
|
549
|
+
self._logger.error(traceback.format_exc())
|
|
550
|
+
await self.pipeline.send_event_async(
|
|
551
|
+
title,
|
|
552
|
+
message,
|
|
553
|
+
ModerationEventTypes.MODERATION_MODEL_RUNTIME_ERROR,
|
|
554
|
+
guard_name=guard.name,
|
|
555
|
+
)
|
|
556
|
+
|
|
557
|
+
return copy_df
|
|
558
|
+
|
|
514
559
|
async def _handle_cost(self, guard, copy_df, stage):
|
|
515
560
|
if not isinstance(guard, OOTBCostMetric):
|
|
516
561
|
raise ValueError(f"Guard object should be of type OOTBCostMetric, got: {type(guard)}")
|
|
@@ -560,6 +605,10 @@ class AsyncGuardExecutor:
|
|
|
560
605
|
copy_df = await self._handle_task_adherence(guard, copy_df, stage)
|
|
561
606
|
# No intervention for task adherence
|
|
562
607
|
intervene = False
|
|
608
|
+
elif guard.ootb_type == OOTBType.GUIDELINE_ADHERENCE:
|
|
609
|
+
copy_df = await self._handle_guideline_adherence(guard, copy_df, stage)
|
|
610
|
+
# No intervention for guideline adherence
|
|
611
|
+
intervene = False
|
|
563
612
|
elif guard.ootb_type == OOTBType.CUSTOM_METRIC:
|
|
564
613
|
body = {
|
|
565
614
|
"df": copy_df.to_dict(),
|
|
@@ -634,6 +683,62 @@ class AsyncGuardExecutor:
|
|
|
634
683
|
copy_df = self._dont_intervene(guard, copy_df, stage)
|
|
635
684
|
return copy_df
|
|
636
685
|
|
|
686
|
+
async def run_nemo_evaluator_guard(self, guard: NeMoEvaluatorGuard, copy_df, stage):
|
|
687
|
+
if not isinstance(guard, NeMoEvaluatorGuard):
|
|
688
|
+
raise ValueError(
|
|
689
|
+
f"Guard object should be of type NeMoEvaluatorGuard, got: {type(guard)}"
|
|
690
|
+
)
|
|
691
|
+
if guard.nemo_evaluator_type != NemoEvaluatorType.LLM_JUDGE and stage == GuardStage.PROMPT:
|
|
692
|
+
raise ValueError(
|
|
693
|
+
f"NeMoEvaluatorGuard of type {guard.nemo_evaluator_type} "
|
|
694
|
+
f"can only be applied to response stage"
|
|
695
|
+
)
|
|
696
|
+
|
|
697
|
+
# get column names
|
|
698
|
+
prompt_column_name = self.pipeline.get_input_column(GuardStage.PROMPT)
|
|
699
|
+
response_column_name = self.pipeline.get_input_column(GuardStage.RESPONSE)
|
|
700
|
+
citation_column_names = get_citation_columns(copy_df.columns)
|
|
701
|
+
|
|
702
|
+
# check citation / reference context columns exist when required
|
|
703
|
+
if (
|
|
704
|
+
guard.nemo_evaluator_type
|
|
705
|
+
in [
|
|
706
|
+
NemoEvaluatorType.CONTEXT_RELEVANCE,
|
|
707
|
+
NemoEvaluatorType.RESPONSE_GROUNDEDNESS,
|
|
708
|
+
NemoEvaluatorType.FAITHFULNESS,
|
|
709
|
+
]
|
|
710
|
+
and not citation_column_names
|
|
711
|
+
):
|
|
712
|
+
raise ValueError(
|
|
713
|
+
f"NeMoEvaluatorGuard of type {guard.nemo_evaluator_type} is configured without "
|
|
714
|
+
f"citation columns. Input Column Names: {', '.join(copy_df.columns.to_list())}."
|
|
715
|
+
)
|
|
716
|
+
|
|
717
|
+
# compute score
|
|
718
|
+
scores = await asyncio.gather(
|
|
719
|
+
*(
|
|
720
|
+
guard.evaluate(
|
|
721
|
+
prompt=row.get(prompt_column_name),
|
|
722
|
+
response=row.get(response_column_name),
|
|
723
|
+
retrieved_contexts=[
|
|
724
|
+
row[column]
|
|
725
|
+
for column in citation_column_names
|
|
726
|
+
if row.get(column) is not None
|
|
727
|
+
],
|
|
728
|
+
)
|
|
729
|
+
for row in copy_df.to_dict(orient="records")
|
|
730
|
+
)
|
|
731
|
+
)
|
|
732
|
+
copy_df[guard.metric_column_name] = scores
|
|
733
|
+
|
|
734
|
+
# handle intervention
|
|
735
|
+
if self._should_intervene(guard):
|
|
736
|
+
copy_df, _ = self._intervene(guard, copy_df, stage, guard.metric_column_name)
|
|
737
|
+
else:
|
|
738
|
+
copy_df = self._dont_intervene(guard, copy_df, stage)
|
|
739
|
+
|
|
740
|
+
return copy_df
|
|
741
|
+
|
|
637
742
|
def run_guards(self, input_df, guards, stage):
|
|
638
743
|
start_time = time.time()
|
|
639
744
|
df = self.loop.run_until_complete(self.async_guard_executor(input_df, guards, stage))
|
|
@@ -694,17 +799,25 @@ class AsyncGuardExecutor:
|
|
|
694
799
|
|
|
695
800
|
def _get_input_df_for_the_guard(self, _input_df, join_columns, guard, stage):
|
|
696
801
|
if stage == GuardStage.RESPONSE and isinstance(guard, OOTBGuard):
|
|
697
|
-
if
|
|
802
|
+
if (
|
|
803
|
+
guard.ootb_type
|
|
804
|
+
in [OOTBType.ROUGE_1, OOTBType.FAITHFULNESS, OOTBType.GUIDELINE_ADHERENCE]
|
|
805
|
+
or guard.copy_citations
|
|
806
|
+
):
|
|
698
807
|
join_columns = join_columns.union(set(get_citation_columns(_input_df.columns)))
|
|
699
808
|
if guard.ootb_type in [
|
|
700
809
|
OOTBType.FAITHFULNESS,
|
|
701
810
|
OOTBType.COST,
|
|
702
811
|
OOTBType.AGENT_GOAL_ACCURACY,
|
|
703
812
|
OOTBType.TASK_ADHERENCE,
|
|
813
|
+
OOTBType.GUIDELINE_ADHERENCE,
|
|
704
814
|
]:
|
|
705
815
|
join_columns.add(self.pipeline.get_input_column(GuardStage.PROMPT))
|
|
706
816
|
if guard.ootb_type in [OOTBType.AGENT_GOAL_ACCURACY, OOTBType.TASK_ADHERENCE]:
|
|
707
817
|
join_columns.add(AGENTIC_PIPELINE_INTERACTIONS_ATTR)
|
|
818
|
+
elif stage == GuardStage.RESPONSE and isinstance(guard, NeMoEvaluatorGuard):
|
|
819
|
+
join_columns.add(self.pipeline.get_input_column(GuardStage.PROMPT))
|
|
820
|
+
join_columns = join_columns.union(set(get_citation_columns(_input_df.columns)))
|
|
708
821
|
copy_df = _input_df[list(join_columns)].copy(deep=True)
|
|
709
822
|
return copy_df, join_columns
|
|
710
823
|
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
# ---------------------------------------------------------------------------------
|
|
2
|
+
# Copyright (c) 2026 DataRobot, Inc. and its affiliates. All rights reserved.
|
|
3
|
+
# Last updated 2026.
|
|
4
|
+
#
|
|
5
|
+
# DataRobot, Inc. Confidential.
|
|
6
|
+
# This is proprietary source code of DataRobot, Inc. and its affiliates.
|
|
7
|
+
#
|
|
8
|
+
# This file and its contents are subject to DataRobot Tool and Utility Agreement.
|
|
9
|
+
# For details, see
|
|
10
|
+
# https://www.datarobot.com/wp-content/uploads/2021/07/DataRobot-Tool-and-Utility-Agreement.pdf.
|
|
11
|
+
# ---------------------------------------------------------------------------------
|
|
12
|
+
import os
|
|
13
|
+
|
|
14
|
+
from datarobot_dome.constants import GuardAction
|
|
15
|
+
from datarobot_dome.constants import GuardOperatorType
|
|
16
|
+
from datarobot_dome.constants import GuardType
|
|
17
|
+
from datarobot_dome.constants import NemoEvaluatorType
|
|
18
|
+
from datarobot_dome.constants import OOTBType
|
|
19
|
+
from datarobot_dome.guards import ModelGuard
|
|
20
|
+
from datarobot_dome.guards import NeMoAgentGoalAccuracyGuard
|
|
21
|
+
from datarobot_dome.guards import NeMoContextRelevanceGuard
|
|
22
|
+
from datarobot_dome.guards import NeMoFaithfulnessGuard
|
|
23
|
+
from datarobot_dome.guards import NeMoGuard
|
|
24
|
+
from datarobot_dome.guards import NeMoLLMJudgeGuard
|
|
25
|
+
from datarobot_dome.guards import NeMoResponseGroundednessGuard
|
|
26
|
+
from datarobot_dome.guards import NeMoResponseRelevancyGuard
|
|
27
|
+
from datarobot_dome.guards import NeMoTopicAdherenceGuard
|
|
28
|
+
from datarobot_dome.guards import OOTBAgentGoalAccuracyGuard
|
|
29
|
+
from datarobot_dome.guards import OOTBCostMetric
|
|
30
|
+
from datarobot_dome.guards import OOTBFaithfulnessGuard
|
|
31
|
+
from datarobot_dome.guards import OOTBGuard
|
|
32
|
+
from datarobot_dome.guards import OOTBTaskAdherenceGuard
|
|
33
|
+
from datarobot_dome.guards.base import Guard
|
|
34
|
+
from datarobot_dome.guards.ootb_guard import OOTBAgentGuidelineAdherence
|
|
35
|
+
from datarobot_dome.guards.validation import guard_trafaret
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class GuardFactory:
|
|
39
|
+
@classmethod
|
|
40
|
+
def _perform_post_validation_checks(cls, guard_config):
|
|
41
|
+
if not guard_config.get("intervention"):
|
|
42
|
+
return
|
|
43
|
+
|
|
44
|
+
if guard_config["intervention"]["action"] == GuardAction.BLOCK and (
|
|
45
|
+
guard_config["intervention"]["message"] is None
|
|
46
|
+
or len(guard_config["intervention"]["message"]) == 0
|
|
47
|
+
):
|
|
48
|
+
raise ValueError("Blocked action needs a blocking message")
|
|
49
|
+
|
|
50
|
+
if guard_config["intervention"]["action"] == GuardAction.REPLACE:
|
|
51
|
+
if "model_info" not in guard_config:
|
|
52
|
+
raise ValueError("'Replace' action needs model_info section")
|
|
53
|
+
if (
|
|
54
|
+
"replacement_text_column_name" not in guard_config["model_info"]
|
|
55
|
+
or guard_config["model_info"]["replacement_text_column_name"] is None
|
|
56
|
+
or len(guard_config["model_info"]["replacement_text_column_name"]) == 0
|
|
57
|
+
):
|
|
58
|
+
raise ValueError(
|
|
59
|
+
"'Replace' action needs valid 'replacement_text_column_name' "
|
|
60
|
+
"in 'model_info' section of the guard"
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
if not guard_config["intervention"].get("conditions"):
|
|
64
|
+
return
|
|
65
|
+
|
|
66
|
+
if len(guard_config["intervention"]["conditions"]) == 0:
|
|
67
|
+
return
|
|
68
|
+
|
|
69
|
+
condition = guard_config["intervention"]["conditions"][0]
|
|
70
|
+
if condition["comparator"] in GuardOperatorType.REQUIRES_LIST_COMPARAND:
|
|
71
|
+
if not isinstance(condition["comparand"], list):
|
|
72
|
+
raise ValueError(
|
|
73
|
+
f"Comparand needs to be a list with {condition['comparator']} comparator"
|
|
74
|
+
)
|
|
75
|
+
elif isinstance(condition["comparand"], list):
|
|
76
|
+
raise ValueError(
|
|
77
|
+
f"Comparand needs to be a scalar with {condition['comparator']} comparator"
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
@staticmethod
|
|
81
|
+
def create(input_config: dict, stage=None, model_dir: str = os.getcwd()) -> Guard:
|
|
82
|
+
config = guard_trafaret.check(input_config)
|
|
83
|
+
|
|
84
|
+
GuardFactory._perform_post_validation_checks(config)
|
|
85
|
+
|
|
86
|
+
if config["type"] == GuardType.MODEL:
|
|
87
|
+
guard = ModelGuard(config, stage)
|
|
88
|
+
elif config["type"] == GuardType.OOTB:
|
|
89
|
+
if config["ootb_type"] == OOTBType.FAITHFULNESS:
|
|
90
|
+
guard = OOTBFaithfulnessGuard(config, stage)
|
|
91
|
+
elif config["ootb_type"] == OOTBType.COST:
|
|
92
|
+
guard = OOTBCostMetric(config, stage)
|
|
93
|
+
elif config["ootb_type"] == OOTBType.AGENT_GOAL_ACCURACY:
|
|
94
|
+
guard = OOTBAgentGoalAccuracyGuard(config, stage)
|
|
95
|
+
elif config["ootb_type"] == OOTBType.TASK_ADHERENCE:
|
|
96
|
+
guard = OOTBTaskAdherenceGuard(config, stage)
|
|
97
|
+
elif config["ootb_type"] == OOTBType.GUIDELINE_ADHERENCE:
|
|
98
|
+
guard = OOTBAgentGuidelineAdherence(config, stage)
|
|
99
|
+
else:
|
|
100
|
+
guard = OOTBGuard(config, stage)
|
|
101
|
+
elif config["type"] == GuardType.NEMO_GUARDRAILS:
|
|
102
|
+
guard = NeMoGuard(config, stage, model_dir)
|
|
103
|
+
elif config["type"] == GuardType.NEMO_EVALUATOR:
|
|
104
|
+
match config["nemo_evaluator_type"]:
|
|
105
|
+
case NemoEvaluatorType.LLM_JUDGE:
|
|
106
|
+
guard = NeMoLLMJudgeGuard(config, stage)
|
|
107
|
+
case NemoEvaluatorType.CONTEXT_RELEVANCE:
|
|
108
|
+
guard = NeMoContextRelevanceGuard(config, stage)
|
|
109
|
+
case NemoEvaluatorType.RESPONSE_GROUNDEDNESS:
|
|
110
|
+
guard = NeMoResponseGroundednessGuard(config, stage)
|
|
111
|
+
case NemoEvaluatorType.TOPIC_ADHERENCE:
|
|
112
|
+
guard = NeMoTopicAdherenceGuard(config, stage)
|
|
113
|
+
case NemoEvaluatorType.AGENT_GOAL_ACCURACY:
|
|
114
|
+
guard = NeMoAgentGoalAccuracyGuard(config, stage)
|
|
115
|
+
case NemoEvaluatorType.RESPONSE_RELEVANCY:
|
|
116
|
+
guard = NeMoResponseRelevancyGuard(config, stage)
|
|
117
|
+
case NemoEvaluatorType.FAITHFULNESS:
|
|
118
|
+
guard = NeMoFaithfulnessGuard(config, stage)
|
|
119
|
+
case _:
|
|
120
|
+
raise ValueError(
|
|
121
|
+
f"Invalid guard type: {config['type']} - {config['nemo_evaluator_type']}"
|
|
122
|
+
)
|
|
123
|
+
else:
|
|
124
|
+
raise ValueError(f"Invalid guard type: {config['type']}")
|
|
125
|
+
|
|
126
|
+
return guard
|
datarobot_dome/guard_helpers.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# ---------------------------------------------------------------------------------
|
|
2
|
-
# Copyright (c)
|
|
3
|
-
# Last updated
|
|
2
|
+
# Copyright (c) 2026 DataRobot, Inc. and its affiliates. All rights reserved.
|
|
3
|
+
# Last updated 2026.
|
|
4
4
|
#
|
|
5
5
|
# DataRobot, Inc. Confidential.
|
|
6
6
|
# This is proprietary source code of DataRobot, Inc. and its affiliates.
|
|
@@ -27,6 +27,7 @@ from langchain_nvidia_ai_endpoints._statics import determine_model
|
|
|
27
27
|
from langchain_openai import AzureChatOpenAI
|
|
28
28
|
from langchain_openai import ChatOpenAI
|
|
29
29
|
from llama_index.core.evaluation import FaithfulnessEvaluator
|
|
30
|
+
from llama_index.core.evaluation import GuidelineEvaluator
|
|
30
31
|
from ragas import MultiTurnSample
|
|
31
32
|
from ragas.messages import AIMessage
|
|
32
33
|
from ragas.messages import HumanMessage
|
|
@@ -434,6 +435,20 @@ def calculate_agent_goal_accuracy(
|
|
|
434
435
|
return scorer.multi_turn_score(sample)
|
|
435
436
|
|
|
436
437
|
|
|
438
|
+
def calculate_agent_guideline_adherence(
|
|
439
|
+
scorer: GuidelineEvaluator,
|
|
440
|
+
prompt: str,
|
|
441
|
+
response: str,
|
|
442
|
+
citations: list[str] | None = None,
|
|
443
|
+
):
|
|
444
|
+
evaluation_results = scorer.evaluate(
|
|
445
|
+
query=prompt,
|
|
446
|
+
context=citations,
|
|
447
|
+
response=response,
|
|
448
|
+
)
|
|
449
|
+
return evaluation_results.passing
|
|
450
|
+
|
|
451
|
+
|
|
437
452
|
class ModerationDeepEvalLLM(DeepEvalBaseLLM):
|
|
438
453
|
def __init__(self, llm, *args, **kwargs):
|
|
439
454
|
self.llm = llm
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# ---------------------------------------------------------------------------------
|
|
2
|
-
# Copyright (c)
|
|
3
|
-
# Last updated
|
|
2
|
+
# Copyright (c) 2026 DataRobot, Inc. and its affiliates. All rights reserved.
|
|
3
|
+
# Last updated 2026.
|
|
4
4
|
#
|
|
5
5
|
# DataRobot, Inc. Confidential.
|
|
6
6
|
# This is proprietary source code of DataRobot, Inc. and its affiliates.
|
|
@@ -9,3 +9,18 @@
|
|
|
9
9
|
# For details, see
|
|
10
10
|
# https://www.datarobot.com/wp-content/uploads/2021/07/DataRobot-Tool-and-Utility-Agreement.pdf.
|
|
11
11
|
# ---------------------------------------------------------------------------------
|
|
12
|
+
from .model_guard import ModelGuard
|
|
13
|
+
from .nemo_evaluator import NeMoAgentGoalAccuracyGuard
|
|
14
|
+
from .nemo_evaluator import NeMoContextRelevanceGuard
|
|
15
|
+
from .nemo_evaluator import NeMoEvaluatorGuard
|
|
16
|
+
from .nemo_evaluator import NeMoFaithfulnessGuard
|
|
17
|
+
from .nemo_evaluator import NeMoLLMJudgeGuard
|
|
18
|
+
from .nemo_evaluator import NeMoResponseGroundednessGuard
|
|
19
|
+
from .nemo_evaluator import NeMoResponseRelevancyGuard
|
|
20
|
+
from .nemo_evaluator import NeMoTopicAdherenceGuard
|
|
21
|
+
from .nemo_guard import NeMoGuard
|
|
22
|
+
from .ootb_guard import OOTBAgentGoalAccuracyGuard
|
|
23
|
+
from .ootb_guard import OOTBCostMetric
|
|
24
|
+
from .ootb_guard import OOTBFaithfulnessGuard
|
|
25
|
+
from .ootb_guard import OOTBGuard
|
|
26
|
+
from .ootb_guard import OOTBTaskAdherenceGuard
|