datarobot-moderations 11.2.10__py3-none-any.whl → 11.2.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datarobot_dome/__init__.py +1 -1
- datarobot_dome/async_http_client.py +1 -1
- datarobot_dome/chat_helper.py +1 -1
- datarobot_dome/constants.py +26 -2
- datarobot_dome/drum_integration.py +1 -1
- datarobot_dome/guard_executor.py +67 -16
- datarobot_dome/guard_factory.py +126 -0
- datarobot_dome/guard_helpers.py +16 -1
- datarobot_dome/guards/__init__.py +16 -1
- datarobot_dome/guards/base.py +259 -0
- datarobot_dome/guards/guard_llm_mixin.py +3 -1
- datarobot_dome/guards/model_guard.py +84 -0
- datarobot_dome/guards/nemo_evaluator.py +73 -0
- datarobot_dome/guards/nemo_guard.py +146 -0
- datarobot_dome/guards/ootb_guard.py +209 -0
- datarobot_dome/guards/validation.py +201 -0
- datarobot_dome/llm.py +1 -1
- datarobot_dome/metrics/__init__.py +1 -1
- datarobot_dome/metrics/citation_metrics.py +1 -1
- datarobot_dome/metrics/factory.py +1 -1
- datarobot_dome/metrics/metric_scorer.py +1 -1
- datarobot_dome/pipeline/__init__.py +1 -1
- datarobot_dome/pipeline/llm_pipeline.py +3 -3
- datarobot_dome/pipeline/pipeline.py +1 -1
- datarobot_dome/pipeline/vdb_pipeline.py +1 -1
- datarobot_dome/runtime.py +1 -1
- datarobot_dome/streaming.py +2 -2
- {datarobot_moderations-11.2.10.dist-info → datarobot_moderations-11.2.11.dist-info}/METADATA +2 -1
- datarobot_moderations-11.2.11.dist-info/RECORD +30 -0
- {datarobot_moderations-11.2.10.dist-info → datarobot_moderations-11.2.11.dist-info}/WHEEL +1 -1
- datarobot_dome/guard.py +0 -845
- datarobot_moderations-11.2.10.dist-info/RECORD +0 -24
datarobot_dome/__init__.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
# ---------------------------------------------------------------------------------
|
|
2
|
-
# Copyright (c)
|
|
2
|
+
# Copyright (c) 2026 DataRobot, Inc. and its affiliates. All rights reserved.
|
|
3
3
|
# Last updated 2025.
|
|
4
4
|
#
|
|
5
5
|
# DataRobot, Inc. Confidential.
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
# ---------------------------------------------------------------------------------
|
|
2
|
-
# Copyright (c)
|
|
2
|
+
# Copyright (c) 2026 DataRobot, Inc. and its affiliates. All rights reserved.
|
|
3
3
|
# Last updated 2025.
|
|
4
4
|
#
|
|
5
5
|
# DataRobot, Inc. Confidential.
|
datarobot_dome/chat_helper.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
# ---------------------------------------------------------------------------------
|
|
2
|
-
# Copyright (c)
|
|
2
|
+
# Copyright (c) 2026 DataRobot, Inc. and its affiliates. All rights reserved.
|
|
3
3
|
# Last updated 2025.
|
|
4
4
|
#
|
|
5
5
|
# DataRobot, Inc. Confidential.
|
datarobot_dome/constants.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
# ---------------------------------------------------------------------------------
|
|
2
|
-
# Copyright (c)
|
|
2
|
+
# Copyright (c) 2026 DataRobot, Inc. and its affiliates. All rights reserved.
|
|
3
3
|
# Last updated 2025.
|
|
4
4
|
#
|
|
5
5
|
# DataRobot, Inc. Confidential.
|
|
@@ -31,6 +31,7 @@ COST_COLUMN_NAME = "cost"
|
|
|
31
31
|
FAITHFULLNESS_COLUMN_NAME = "faithfulness"
|
|
32
32
|
AGENT_GOAL_ACCURACY_COLUMN_NAME = "agent_goal_accuracy"
|
|
33
33
|
TASK_ADHERENCE_SCORE_COLUMN_NAME = "task_adherence_score"
|
|
34
|
+
GUIDELINE_ADHERENCE_COLUMN_NAME = "agent_guideline_adherence_score"
|
|
34
35
|
|
|
35
36
|
CUSTOM_METRIC_DESCRIPTION_SUFFIX = "Created by DataRobot Moderation System"
|
|
36
37
|
|
|
@@ -105,8 +106,9 @@ class GuardType:
|
|
|
105
106
|
OOTB = "ootb" # Out of the Box
|
|
106
107
|
MODEL = "model" # wraps a deployed model
|
|
107
108
|
NEMO_GUARDRAILS = "nemo_guardrails" # Nemo guardrails
|
|
109
|
+
NEMO_EVALUATOR = "nemo_evaluator" # Nemo evaluator
|
|
108
110
|
|
|
109
|
-
ALL = [MODEL, NEMO_GUARDRAILS,
|
|
111
|
+
ALL = [OOTB, MODEL, NEMO_GUARDRAILS, NEMO_EVALUATOR]
|
|
110
112
|
|
|
111
113
|
|
|
112
114
|
class OOTBType:
|
|
@@ -117,6 +119,7 @@ class OOTBType:
|
|
|
117
119
|
CUSTOM_METRIC = "custom_metric"
|
|
118
120
|
COST = "cost"
|
|
119
121
|
TASK_ADHERENCE = "task_adherence"
|
|
122
|
+
GUIDELINE_ADHERENCE = "agent_guideline_adherence"
|
|
120
123
|
|
|
121
124
|
ALL = [
|
|
122
125
|
TOKEN_COUNT,
|
|
@@ -126,6 +129,27 @@ class OOTBType:
|
|
|
126
129
|
COST,
|
|
127
130
|
AGENT_GOAL_ACCURACY,
|
|
128
131
|
TASK_ADHERENCE,
|
|
132
|
+
GUIDELINE_ADHERENCE,
|
|
133
|
+
]
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
class NemoEvaluatorType:
|
|
137
|
+
LLM_JUDGE = "llm_judge"
|
|
138
|
+
CONTEXT_RELEVANCE = "context_relevance"
|
|
139
|
+
RESPONSE_GROUNDEDNESS = "response_groundedness"
|
|
140
|
+
TOPIC_ADHERENCE = "topic_adherence"
|
|
141
|
+
AGENT_GOAL_ACCURACY = "agent_goal_accuracy"
|
|
142
|
+
RESPONSE_RELEVANCY = "response_relevancy"
|
|
143
|
+
FAITHFULNESS = "faithfulness"
|
|
144
|
+
|
|
145
|
+
ALL = [
|
|
146
|
+
LLM_JUDGE,
|
|
147
|
+
CONTEXT_RELEVANCE,
|
|
148
|
+
RESPONSE_GROUNDEDNESS,
|
|
149
|
+
TOPIC_ADHERENCE,
|
|
150
|
+
AGENT_GOAL_ACCURACY,
|
|
151
|
+
RESPONSE_RELEVANCY,
|
|
152
|
+
FAITHFULNESS,
|
|
129
153
|
]
|
|
130
154
|
|
|
131
155
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
# ---------------------------------------------------------------------------------
|
|
2
|
-
# Copyright (c)
|
|
2
|
+
# Copyright (c) 2026 DataRobot, Inc. and its affiliates. All rights reserved.
|
|
3
3
|
# Last updated 2025.
|
|
4
4
|
#
|
|
5
5
|
# DataRobot, Inc. Confidential.
|
datarobot_dome/guard_executor.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
# ---------------------------------------------------------------------------------
|
|
2
|
-
# Copyright (c)
|
|
2
|
+
# Copyright (c) 2026 DataRobot, Inc. and its affiliates. All rights reserved.
|
|
3
3
|
# Last updated 2025.
|
|
4
4
|
#
|
|
5
5
|
# DataRobot, Inc. Confidential.
|
|
@@ -32,14 +32,8 @@ from datarobot_dome.constants import GuardTimeoutAction
|
|
|
32
32
|
from datarobot_dome.constants import GuardType
|
|
33
33
|
from datarobot_dome.constants import ModerationEventTypes
|
|
34
34
|
from datarobot_dome.constants import OOTBType
|
|
35
|
-
from datarobot_dome.guard import AgentGoalAccuracyGuard
|
|
36
|
-
from datarobot_dome.guard import FaithfulnessGuard
|
|
37
|
-
from datarobot_dome.guard import ModelGuard
|
|
38
|
-
from datarobot_dome.guard import NeMoGuard
|
|
39
|
-
from datarobot_dome.guard import OOTBCostMetric
|
|
40
|
-
from datarobot_dome.guard import OOTBGuard
|
|
41
|
-
from datarobot_dome.guard import TaskAdherenceGuard
|
|
42
35
|
from datarobot_dome.guard_helpers import calculate_agent_goal_accuracy
|
|
36
|
+
from datarobot_dome.guard_helpers import calculate_agent_guideline_adherence
|
|
43
37
|
from datarobot_dome.guard_helpers import calculate_faithfulness
|
|
44
38
|
from datarobot_dome.guard_helpers import calculate_task_adherence
|
|
45
39
|
from datarobot_dome.guard_helpers import calculate_token_counts_for_cost_calculations
|
|
@@ -48,6 +42,14 @@ from datarobot_dome.guard_helpers import get_rouge_1_score
|
|
|
48
42
|
from datarobot_dome.guard_helpers import get_token_count
|
|
49
43
|
from datarobot_dome.guard_helpers import nemo_response_stage_input_formatter
|
|
50
44
|
from datarobot_dome.guard_helpers import nemo_response_stage_output_formatter
|
|
45
|
+
from datarobot_dome.guards import ModelGuard
|
|
46
|
+
from datarobot_dome.guards import NeMoGuard
|
|
47
|
+
from datarobot_dome.guards import OOTBAgentGoalAccuracyGuard
|
|
48
|
+
from datarobot_dome.guards import OOTBCostMetric
|
|
49
|
+
from datarobot_dome.guards import OOTBFaithfulnessGuard
|
|
50
|
+
from datarobot_dome.guards import OOTBGuard
|
|
51
|
+
from datarobot_dome.guards import OOTBTaskAdherenceGuard
|
|
52
|
+
from datarobot_dome.guards.ootb_guard import OOTBAgentGuidelineAdherence
|
|
51
53
|
|
|
52
54
|
tracer = trace.get_tracer(__name__)
|
|
53
55
|
|
|
@@ -173,7 +175,7 @@ class AsyncGuardExecutor:
|
|
|
173
175
|
metric_column = guard.model_info.target_name
|
|
174
176
|
|
|
175
177
|
llm_input_column = self.pipeline.get_input_column(stage)
|
|
176
|
-
guard_input_column = guard.
|
|
178
|
+
guard_input_column = guard.get_input_column_name(stage)
|
|
177
179
|
|
|
178
180
|
intervene = self._should_intervene(guard)
|
|
179
181
|
try:
|
|
@@ -335,9 +337,9 @@ class AsyncGuardExecutor:
|
|
|
335
337
|
return self.intervene(guard, copy_df, input_column, metric_column)
|
|
336
338
|
|
|
337
339
|
async def _handle_faithfulness(self, guard, copy_df, stage, intervene):
|
|
338
|
-
if not isinstance(guard,
|
|
340
|
+
if not isinstance(guard, OOTBFaithfulnessGuard):
|
|
339
341
|
raise ValueError(
|
|
340
|
-
f"Guard object should be of type
|
|
342
|
+
f"Guard object should be of type OOTBFaithfulnessGuard, got: {type(guard)}"
|
|
341
343
|
)
|
|
342
344
|
if stage == GuardStage.PROMPT:
|
|
343
345
|
raise ValueError("Faithfulness only supports evaluating the response")
|
|
@@ -387,9 +389,9 @@ class AsyncGuardExecutor:
|
|
|
387
389
|
return copy_df, intervene
|
|
388
390
|
|
|
389
391
|
async def _handle_agent_goal_accuracy(self, guard, copy_df, stage):
|
|
390
|
-
if not isinstance(guard,
|
|
392
|
+
if not isinstance(guard, OOTBAgentGoalAccuracyGuard):
|
|
391
393
|
raise ValueError(
|
|
392
|
-
f"Guard object should be of type
|
|
394
|
+
f"Guard object should be of type OOTBAgentGoalAccuracyGuard, got: {type(guard)}"
|
|
393
395
|
)
|
|
394
396
|
if stage == GuardStage.PROMPT:
|
|
395
397
|
raise ValueError("Agent Goal Accuracy only supports evaluating the response")
|
|
@@ -434,9 +436,9 @@ class AsyncGuardExecutor:
|
|
|
434
436
|
return copy_df
|
|
435
437
|
|
|
436
438
|
async def _handle_task_adherence(self, guard, copy_df, stage):
|
|
437
|
-
if not isinstance(guard,
|
|
439
|
+
if not isinstance(guard, OOTBTaskAdherenceGuard):
|
|
438
440
|
raise ValueError(
|
|
439
|
-
f"Guard object should be of type
|
|
441
|
+
f"Guard object should be of type OOTBTaskAdherenceGuard, got: {type(guard)}"
|
|
440
442
|
)
|
|
441
443
|
if stage == GuardStage.PROMPT:
|
|
442
444
|
raise ValueError("Task Adherence only supports evaluating the response")
|
|
@@ -511,6 +513,46 @@ class AsyncGuardExecutor:
|
|
|
511
513
|
)
|
|
512
514
|
return copy_df, intervene
|
|
513
515
|
|
|
516
|
+
async def _handle_guideline_adherence(self, guard, copy_df, stage):
|
|
517
|
+
if not isinstance(guard, OOTBAgentGuidelineAdherence):
|
|
518
|
+
raise ValueError(
|
|
519
|
+
f"Guard object should be of type AgentGuidelineAdherence, got: {type(guard)}"
|
|
520
|
+
)
|
|
521
|
+
|
|
522
|
+
if stage != GuardStage.RESPONSE:
|
|
523
|
+
raise ValueError("Guideline Adherence only supports evaluating the response")
|
|
524
|
+
|
|
525
|
+
citation_columns = get_citation_columns(copy_df.columns)
|
|
526
|
+
prompt_column_name = self.pipeline.get_input_column(GuardStage.PROMPT)
|
|
527
|
+
response_column_name = self.pipeline.get_input_column(GuardStage.RESPONSE)
|
|
528
|
+
metric_column_name = guard.metric_column_name
|
|
529
|
+
try:
|
|
530
|
+
copy_df[metric_column_name] = copy_df.apply(
|
|
531
|
+
lambda x: calculate_agent_guideline_adherence(
|
|
532
|
+
scorer=guard.guideline_adherence_scorer,
|
|
533
|
+
prompt=x[prompt_column_name],
|
|
534
|
+
response=x[response_column_name],
|
|
535
|
+
citations=[x[col] for col in citation_columns]
|
|
536
|
+
if len(citation_columns) > 0
|
|
537
|
+
else None,
|
|
538
|
+
),
|
|
539
|
+
axis=1,
|
|
540
|
+
)
|
|
541
|
+
|
|
542
|
+
except Exception as e:
|
|
543
|
+
title = "Guideline Adherence calculation failed"
|
|
544
|
+
message = f"Exception: {e}"
|
|
545
|
+
self._logger.error(title + " " + message)
|
|
546
|
+
self._logger.error(traceback.format_exc())
|
|
547
|
+
await self.pipeline.send_event_async(
|
|
548
|
+
title,
|
|
549
|
+
message,
|
|
550
|
+
ModerationEventTypes.MODERATION_MODEL_RUNTIME_ERROR,
|
|
551
|
+
guard_name=guard.name,
|
|
552
|
+
)
|
|
553
|
+
|
|
554
|
+
return copy_df
|
|
555
|
+
|
|
514
556
|
async def _handle_cost(self, guard, copy_df, stage):
|
|
515
557
|
if not isinstance(guard, OOTBCostMetric):
|
|
516
558
|
raise ValueError(f"Guard object should be of type OOTBCostMetric, got: {type(guard)}")
|
|
@@ -560,6 +602,10 @@ class AsyncGuardExecutor:
|
|
|
560
602
|
copy_df = await self._handle_task_adherence(guard, copy_df, stage)
|
|
561
603
|
# No intervention for task adherence
|
|
562
604
|
intervene = False
|
|
605
|
+
elif guard.ootb_type == OOTBType.GUIDELINE_ADHERENCE:
|
|
606
|
+
copy_df = await self._handle_guideline_adherence(guard, copy_df, stage)
|
|
607
|
+
# No intervention for guideline adherence
|
|
608
|
+
intervene = False
|
|
563
609
|
elif guard.ootb_type == OOTBType.CUSTOM_METRIC:
|
|
564
610
|
body = {
|
|
565
611
|
"df": copy_df.to_dict(),
|
|
@@ -694,13 +740,18 @@ class AsyncGuardExecutor:
|
|
|
694
740
|
|
|
695
741
|
def _get_input_df_for_the_guard(self, _input_df, join_columns, guard, stage):
|
|
696
742
|
if stage == GuardStage.RESPONSE and isinstance(guard, OOTBGuard):
|
|
697
|
-
if
|
|
743
|
+
if (
|
|
744
|
+
guard.ootb_type
|
|
745
|
+
in [OOTBType.ROUGE_1, OOTBType.FAITHFULNESS, OOTBType.GUIDELINE_ADHERENCE]
|
|
746
|
+
or guard.copy_citations
|
|
747
|
+
):
|
|
698
748
|
join_columns = join_columns.union(set(get_citation_columns(_input_df.columns)))
|
|
699
749
|
if guard.ootb_type in [
|
|
700
750
|
OOTBType.FAITHFULNESS,
|
|
701
751
|
OOTBType.COST,
|
|
702
752
|
OOTBType.AGENT_GOAL_ACCURACY,
|
|
703
753
|
OOTBType.TASK_ADHERENCE,
|
|
754
|
+
OOTBType.GUIDELINE_ADHERENCE,
|
|
704
755
|
]:
|
|
705
756
|
join_columns.add(self.pipeline.get_input_column(GuardStage.PROMPT))
|
|
706
757
|
if guard.ootb_type in [OOTBType.AGENT_GOAL_ACCURACY, OOTBType.TASK_ADHERENCE]:
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
# ---------------------------------------------------------------------------------
|
|
2
|
+
# Copyright (c) 2026 DataRobot, Inc. and its affiliates. All rights reserved.
|
|
3
|
+
# Last updated 2025.
|
|
4
|
+
#
|
|
5
|
+
# DataRobot, Inc. Confidential.
|
|
6
|
+
# This is proprietary source code of DataRobot, Inc. and its affiliates.
|
|
7
|
+
#
|
|
8
|
+
# This file and its contents are subject to DataRobot Tool and Utility Agreement.
|
|
9
|
+
# For details, see
|
|
10
|
+
# https://www.datarobot.com/wp-content/uploads/2021/07/DataRobot-Tool-and-Utility-Agreement.pdf.
|
|
11
|
+
# ---------------------------------------------------------------------------------
|
|
12
|
+
import os
|
|
13
|
+
|
|
14
|
+
from datarobot_dome.constants import GuardAction
|
|
15
|
+
from datarobot_dome.constants import GuardOperatorType
|
|
16
|
+
from datarobot_dome.constants import GuardType
|
|
17
|
+
from datarobot_dome.constants import NemoEvaluatorType
|
|
18
|
+
from datarobot_dome.constants import OOTBType
|
|
19
|
+
from datarobot_dome.guards import ModelGuard
|
|
20
|
+
from datarobot_dome.guards import NeMoAgentGoalAccuracyGuard
|
|
21
|
+
from datarobot_dome.guards import NeMoContextRelevanceGuard
|
|
22
|
+
from datarobot_dome.guards import NeMoFaithfulnessGuard
|
|
23
|
+
from datarobot_dome.guards import NeMoGuard
|
|
24
|
+
from datarobot_dome.guards import NeMoLLMJudgeGuard
|
|
25
|
+
from datarobot_dome.guards import NeMoResponseGroundednessGuard
|
|
26
|
+
from datarobot_dome.guards import NeMoResponseRelevancyGuard
|
|
27
|
+
from datarobot_dome.guards import NeMoTopicAdherenceGuard
|
|
28
|
+
from datarobot_dome.guards import OOTBAgentGoalAccuracyGuard
|
|
29
|
+
from datarobot_dome.guards import OOTBCostMetric
|
|
30
|
+
from datarobot_dome.guards import OOTBFaithfulnessGuard
|
|
31
|
+
from datarobot_dome.guards import OOTBGuard
|
|
32
|
+
from datarobot_dome.guards import OOTBTaskAdherenceGuard
|
|
33
|
+
from datarobot_dome.guards.base import Guard
|
|
34
|
+
from datarobot_dome.guards.ootb_guard import OOTBAgentGuidelineAdherence
|
|
35
|
+
from datarobot_dome.guards.validation import guard_trafaret
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class GuardFactory:
|
|
39
|
+
@classmethod
|
|
40
|
+
def _perform_post_validation_checks(cls, guard_config):
|
|
41
|
+
if not guard_config.get("intervention"):
|
|
42
|
+
return
|
|
43
|
+
|
|
44
|
+
if guard_config["intervention"]["action"] == GuardAction.BLOCK and (
|
|
45
|
+
guard_config["intervention"]["message"] is None
|
|
46
|
+
or len(guard_config["intervention"]["message"]) == 0
|
|
47
|
+
):
|
|
48
|
+
raise ValueError("Blocked action needs a blocking message")
|
|
49
|
+
|
|
50
|
+
if guard_config["intervention"]["action"] == GuardAction.REPLACE:
|
|
51
|
+
if "model_info" not in guard_config:
|
|
52
|
+
raise ValueError("'Replace' action needs model_info section")
|
|
53
|
+
if (
|
|
54
|
+
"replacement_text_column_name" not in guard_config["model_info"]
|
|
55
|
+
or guard_config["model_info"]["replacement_text_column_name"] is None
|
|
56
|
+
or len(guard_config["model_info"]["replacement_text_column_name"]) == 0
|
|
57
|
+
):
|
|
58
|
+
raise ValueError(
|
|
59
|
+
"'Replace' action needs valid 'replacement_text_column_name' "
|
|
60
|
+
"in 'model_info' section of the guard"
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
if not guard_config["intervention"].get("conditions"):
|
|
64
|
+
return
|
|
65
|
+
|
|
66
|
+
if len(guard_config["intervention"]["conditions"]) == 0:
|
|
67
|
+
return
|
|
68
|
+
|
|
69
|
+
condition = guard_config["intervention"]["conditions"][0]
|
|
70
|
+
if condition["comparator"] in GuardOperatorType.REQUIRES_LIST_COMPARAND:
|
|
71
|
+
if not isinstance(condition["comparand"], list):
|
|
72
|
+
raise ValueError(
|
|
73
|
+
f"Comparand needs to be a list with {condition['comparator']} comparator"
|
|
74
|
+
)
|
|
75
|
+
elif isinstance(condition["comparand"], list):
|
|
76
|
+
raise ValueError(
|
|
77
|
+
f"Comparand needs to be a scalar with {condition['comparator']} comparator"
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
@staticmethod
|
|
81
|
+
def create(input_config: dict, stage=None, model_dir: str = os.getcwd()) -> Guard:
|
|
82
|
+
config = guard_trafaret.check(input_config)
|
|
83
|
+
|
|
84
|
+
GuardFactory._perform_post_validation_checks(config)
|
|
85
|
+
|
|
86
|
+
if config["type"] == GuardType.MODEL:
|
|
87
|
+
guard = ModelGuard(config, stage)
|
|
88
|
+
elif config["type"] == GuardType.OOTB:
|
|
89
|
+
if config["ootb_type"] == OOTBType.FAITHFULNESS:
|
|
90
|
+
guard = OOTBFaithfulnessGuard(config, stage)
|
|
91
|
+
elif config["ootb_type"] == OOTBType.COST:
|
|
92
|
+
guard = OOTBCostMetric(config, stage)
|
|
93
|
+
elif config["ootb_type"] == OOTBType.AGENT_GOAL_ACCURACY:
|
|
94
|
+
guard = OOTBAgentGoalAccuracyGuard(config, stage)
|
|
95
|
+
elif config["ootb_type"] == OOTBType.TASK_ADHERENCE:
|
|
96
|
+
guard = OOTBTaskAdherenceGuard(config, stage)
|
|
97
|
+
elif config["ootb_type"] == OOTBType.GUIDELINE_ADHERENCE:
|
|
98
|
+
guard = OOTBAgentGuidelineAdherence(config, stage)
|
|
99
|
+
else:
|
|
100
|
+
guard = OOTBGuard(config, stage)
|
|
101
|
+
elif config["type"] == GuardType.NEMO_GUARDRAILS:
|
|
102
|
+
guard = NeMoGuard(config, stage, model_dir)
|
|
103
|
+
elif config["type"] == GuardType.NEMO_EVALUATOR:
|
|
104
|
+
match config["nemo_evaluator_type"]:
|
|
105
|
+
case NemoEvaluatorType.LLM_JUDGE:
|
|
106
|
+
guard = NeMoLLMJudgeGuard(config, stage)
|
|
107
|
+
case NemoEvaluatorType.CONTEXT_RELEVANCE:
|
|
108
|
+
guard = NeMoContextRelevanceGuard(config, stage)
|
|
109
|
+
case NemoEvaluatorType.RESPONSE_GROUNDEDNESS:
|
|
110
|
+
guard = NeMoResponseGroundednessGuard(config, stage)
|
|
111
|
+
case NemoEvaluatorType.TOPIC_ADHERENCE:
|
|
112
|
+
guard = NeMoTopicAdherenceGuard(config, stage)
|
|
113
|
+
case NemoEvaluatorType.AGENT_GOAL_ACCURACY:
|
|
114
|
+
guard = NeMoAgentGoalAccuracyGuard(config, stage)
|
|
115
|
+
case NemoEvaluatorType.RESPONSE_RELEVANCY:
|
|
116
|
+
guard = NeMoResponseRelevancyGuard(config, stage)
|
|
117
|
+
case NemoEvaluatorType.FAITHFULNESS:
|
|
118
|
+
guard = NeMoFaithfulnessGuard(config, stage)
|
|
119
|
+
case _:
|
|
120
|
+
raise ValueError(
|
|
121
|
+
f"Invalid guard type: {config['type']} - {config['nemo_evaluator_type']}"
|
|
122
|
+
)
|
|
123
|
+
else:
|
|
124
|
+
raise ValueError(f"Invalid guard type: {config['type']}")
|
|
125
|
+
|
|
126
|
+
return guard
|
datarobot_dome/guard_helpers.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
# ---------------------------------------------------------------------------------
|
|
2
|
-
# Copyright (c)
|
|
2
|
+
# Copyright (c) 2026 DataRobot, Inc. and its affiliates. All rights reserved.
|
|
3
3
|
# Last updated 2025.
|
|
4
4
|
#
|
|
5
5
|
# DataRobot, Inc. Confidential.
|
|
@@ -27,6 +27,7 @@ from langchain_nvidia_ai_endpoints._statics import determine_model
|
|
|
27
27
|
from langchain_openai import AzureChatOpenAI
|
|
28
28
|
from langchain_openai import ChatOpenAI
|
|
29
29
|
from llama_index.core.evaluation import FaithfulnessEvaluator
|
|
30
|
+
from llama_index.core.evaluation import GuidelineEvaluator
|
|
30
31
|
from ragas import MultiTurnSample
|
|
31
32
|
from ragas.messages import AIMessage
|
|
32
33
|
from ragas.messages import HumanMessage
|
|
@@ -434,6 +435,20 @@ def calculate_agent_goal_accuracy(
|
|
|
434
435
|
return scorer.multi_turn_score(sample)
|
|
435
436
|
|
|
436
437
|
|
|
438
|
+
def calculate_agent_guideline_adherence(
|
|
439
|
+
scorer: GuidelineEvaluator,
|
|
440
|
+
prompt: str,
|
|
441
|
+
response: str,
|
|
442
|
+
citations: list[str] | None = None,
|
|
443
|
+
):
|
|
444
|
+
evaluation_results = scorer.evaluate(
|
|
445
|
+
query=prompt,
|
|
446
|
+
context=citations,
|
|
447
|
+
response=response,
|
|
448
|
+
)
|
|
449
|
+
return evaluation_results.passing
|
|
450
|
+
|
|
451
|
+
|
|
437
452
|
class ModerationDeepEvalLLM(DeepEvalBaseLLM):
|
|
438
453
|
def __init__(self, llm, *args, **kwargs):
|
|
439
454
|
self.llm = llm
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
# ---------------------------------------------------------------------------------
|
|
2
|
-
# Copyright (c)
|
|
2
|
+
# Copyright (c) 2026 DataRobot, Inc. and its affiliates. All rights reserved.
|
|
3
3
|
# Last updated 2025.
|
|
4
4
|
#
|
|
5
5
|
# DataRobot, Inc. Confidential.
|
|
@@ -9,3 +9,18 @@
|
|
|
9
9
|
# For details, see
|
|
10
10
|
# https://www.datarobot.com/wp-content/uploads/2021/07/DataRobot-Tool-and-Utility-Agreement.pdf.
|
|
11
11
|
# ---------------------------------------------------------------------------------
|
|
12
|
+
from .model_guard import ModelGuard
|
|
13
|
+
from .nemo_evaluator import NeMoAgentGoalAccuracyGuard
|
|
14
|
+
from .nemo_evaluator import NeMoContextRelevanceGuard
|
|
15
|
+
from .nemo_evaluator import NeMoEvaluatorGuard
|
|
16
|
+
from .nemo_evaluator import NeMoFaithfulnessGuard
|
|
17
|
+
from .nemo_evaluator import NeMoLLMJudgeGuard
|
|
18
|
+
from .nemo_evaluator import NeMoResponseGroundednessGuard
|
|
19
|
+
from .nemo_evaluator import NeMoResponseRelevancyGuard
|
|
20
|
+
from .nemo_evaluator import NeMoTopicAdherenceGuard
|
|
21
|
+
from .nemo_guard import NeMoGuard
|
|
22
|
+
from .ootb_guard import OOTBAgentGoalAccuracyGuard
|
|
23
|
+
from .ootb_guard import OOTBCostMetric
|
|
24
|
+
from .ootb_guard import OOTBFaithfulnessGuard
|
|
25
|
+
from .ootb_guard import OOTBGuard
|
|
26
|
+
from .ootb_guard import OOTBTaskAdherenceGuard
|