datarobot-moderations 11.1.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,977 @@
1
+ # ---------------------------------------------------------------------------------
2
+ # Copyright (c) 2025 DataRobot, Inc. and its affiliates. All rights reserved.
3
+ # Last updated 2025.
4
+ #
5
+ # DataRobot, Inc. Confidential.
6
+ # This is proprietary source code of DataRobot, Inc. and its affiliates.
7
+ #
8
+ # This file and its contents are subject to DataRobot Tool and Utility Agreement.
9
+ # For details, see
10
+ # https://www.datarobot.com/wp-content/uploads/2021/07/DataRobot-Tool-and-Utility-Agreement.pdf.
11
+ # ---------------------------------------------------------------------------------
12
+ import copy
13
+ import itertools
14
+ import json
15
+ import logging
16
+ import os
17
+ import time
18
+ import traceback
19
+ import uuid
20
+ from collections.abc import Iterable
21
+ from typing import Optional
22
+
23
+ import numpy as np
24
+ import pandas as pd
25
+ from openai.types.chat import ChatCompletionChunk
26
+ from openai.types.chat.chat_completion import ChatCompletion
27
+ from openai.types.chat.chat_completion import Choice
28
+ from openai.types.chat.chat_completion_message import ChatCompletionMessage
29
+
30
+ from datarobot_dome.chat_helper import add_citations_to_df
31
+ from datarobot_dome.chat_helper import add_token_count_columns_to_df
32
+ from datarobot_dome.chat_helper import build_moderations_attribute_for_completion
33
+ from datarobot_dome.chat_helper import calculate_token_counts_and_confidence_score
34
+ from datarobot_dome.chat_helper import get_all_citation_columns
35
+ from datarobot_dome.chat_helper import get_response_message_and_finish_reason
36
+ from datarobot_dome.chat_helper import remove_unnecessary_columns
37
+ from datarobot_dome.chat_helper import run_postscore_guards
38
+ from datarobot_dome.constants import AGENTIC_PIPELINE_INTERACTIONS_ATTR
39
+ from datarobot_dome.constants import CHAT_COMPLETION_OBJECT
40
+ from datarobot_dome.constants import CITATIONS_ATTR
41
+ from datarobot_dome.constants import DATAROBOT_MODERATIONS_ATTR
42
+ from datarobot_dome.constants import DISABLE_MODERATION_RUNTIME_PARAM_NAME
43
+ from datarobot_dome.constants import LLM_BLUEPRINT_ID_ATTR
44
+ from datarobot_dome.constants import LLM_CONTEXT_COLUMN_NAME
45
+ from datarobot_dome.constants import LLM_PROVIDER_GUARDS_ATTR
46
+ from datarobot_dome.constants import MODERATION_CONFIG_FILE_NAME
47
+ from datarobot_dome.constants import MODERATION_MODEL_NAME
48
+ from datarobot_dome.constants import NONE_CUSTOM_PY_RESPONSE
49
+ from datarobot_dome.constants import PROMPT_VECTOR_ATTR
50
+ from datarobot_dome.constants import USAGE_ATTR
51
+ from datarobot_dome.constants import GuardStage
52
+ from datarobot_dome.constants import ModerationEventTypes
53
+ from datarobot_dome.constants import TargetType
54
+ from datarobot_dome.guard_executor import AsyncGuardExecutor
55
+ from datarobot_dome.pipeline.llm_pipeline import LLMPipeline
56
+ from datarobot_dome.pipeline.vdb_pipeline import VDBPipeline
57
+ from datarobot_dome.streaming import ModerationIterator
58
+ from datarobot_dome.streaming import StreamingContextBuilder
59
+
60
+ _logger = logging.getLogger("drum_integration")
61
+
62
+
63
+ datarobot_metadata_columns = [
64
+ "datarobot_token_count",
65
+ "datarobot_latency",
66
+ "datarobot_confidence_score",
67
+ ]
68
+
69
+
70
+ def block_citations_if_prompt_blocked(pipeline, result_df):
71
+ # Citations are already copied from postscore_df to result_df, we just
72
+ # mask the blocked ones here.
73
+ if LLM_CONTEXT_COLUMN_NAME not in result_df.columns:
74
+ return
75
+
76
+ prompt_column_name = pipeline.get_input_column(GuardStage.PROMPT)
77
+ blocked_prompt_column_name = f"blocked_{prompt_column_name}"
78
+ for index, row in result_df.iterrows():
79
+ if row[blocked_prompt_column_name]:
80
+ # If the row is blocked, set default value
81
+ result_df.loc[index, LLM_CONTEXT_COLUMN_NAME] = ""
82
+
83
+
84
+ def _handle_result_df_error_cases(prompt_column_name, df, latency):
85
+ replaced_message_prompt_column_name = f"replaced_message_{prompt_column_name}"
86
+ moderated_prompt_column_name = f"moderated_{prompt_column_name}"
87
+ replaced_prompt_column_name = f"replaced_{prompt_column_name}"
88
+ for index, row in df.iterrows():
89
+ if row.get(replaced_prompt_column_name):
90
+ df.loc[index, moderated_prompt_column_name] = row[replaced_message_prompt_column_name]
91
+ else:
92
+ df.loc[index, moderated_prompt_column_name] = row[prompt_column_name]
93
+ df["datarobot_latency"] = latency / df.shape[0]
94
+ # No tokens, every prompt is blocked
95
+ df["datarobot_token_count"] = 0
96
+ df["datarobot_confidence_score"] = 0.0
97
+ if prompt_column_name in df.columns:
98
+ df.drop(prompt_column_name, axis=1, inplace=True)
99
+ return df
100
+
101
+
102
+ def run_prescore_guards(pipeline, data):
103
+ """
104
+ Run prescore guards on the input data.
105
+
106
+ Args:
107
+ pipeline: Guard Pipeline
108
+ data: Input dataframe sent for predictions by the user
109
+
110
+ Returns:
111
+ prescore_df: Dataframe with all moderations applied to the input. It has
112
+ all the moderation information into various columns and is required
113
+ to build the final result dataframe (as `prescore_df` argument to
114
+ the method `format_result_df`)
115
+ filtered_df: Dataframe with blocked rows removed. This is the dataframe
116
+ to be used as input for the user's `score` method
117
+ prescore_latency: Latency of executing prescore guards
118
+ """
119
+ prompt_column_name = pipeline.get_input_column(GuardStage.PROMPT)
120
+ blocked_prompt_column_name = f"blocked_{prompt_column_name}"
121
+ replaced_prompt_column_name = f"replaced_{prompt_column_name}"
122
+ replaced_message_prompt_column_name = f"replaced_message_{prompt_column_name}"
123
+
124
+ input_df = data.copy(deep=True)
125
+ if len(pipeline.get_prescore_guards()) == 0:
126
+ input_df[blocked_prompt_column_name] = False
127
+ return input_df, input_df, 0
128
+
129
+ start_time = time.time()
130
+
131
+ try:
132
+ prescore_df, prescore_latency = AsyncGuardExecutor(pipeline).run_guards(
133
+ input_df, pipeline.get_prescore_guards(), GuardStage.PROMPT
134
+ )
135
+ except Exception as e:
136
+ end_time = time.time()
137
+ _logger.error(f"Failed to run prescore guards: {e}")
138
+ _logger.error(traceback.format_exc())
139
+ prescore_df = input_df
140
+ prescore_df[blocked_prompt_column_name] = False
141
+ prescore_latency = end_time - start_time
142
+
143
+ _logger.debug(prescore_df)
144
+ # Filter out the blocked prompts, we will not send those prompts
145
+ # for LLM scoring
146
+ if blocked_prompt_column_name in prescore_df.columns:
147
+ filtered_df = prescore_df[~prescore_df[blocked_prompt_column_name]]
148
+ else:
149
+ filtered_df = prescore_df
150
+
151
+ # Now we are done with pre-score stage, we have to change the prompts
152
+ # as replaced by say PII kind of guards
153
+ for index, row in filtered_df.iterrows():
154
+ if row.get(replaced_prompt_column_name):
155
+ filtered_df.loc[index, prompt_column_name] = row[replaced_message_prompt_column_name]
156
+
157
+ # `filtered_df` is used to call the user's `score` method, so as
158
+ # part of return value we only send the columns that were present in
159
+ # the original input dataframe. Moderation information should not be
160
+ # in the filtered_df
161
+ return prescore_df, filtered_df[data.columns], prescore_latency
162
+
163
+
164
+ def __add_citation_columns_to_predictions_df(predictions_df):
165
+ if LLM_CONTEXT_COLUMN_NAME not in predictions_df.columns:
166
+ return predictions_df
167
+
168
+ # Remove existing citation columns - currently playground is sending CITATION_*
169
+ # explicitly. Lets remove those and generate our own
170
+ citation_columns = get_all_citation_columns(predictions_df)
171
+ if len(citation_columns) > 0:
172
+ predictions_df = predictions_df.drop(columns=citation_columns, axis=1)
173
+ citations_dataframe = pd.DataFrame()
174
+ for row_index, llm_context in enumerate(predictions_df[LLM_CONTEXT_COLUMN_NAME].tolist()):
175
+ docs = json.loads(llm_context)
176
+ d = {}
177
+ for index, doc in enumerate(docs):
178
+ d[f"CITATION_CONTENT_{index}"] = [doc["content"]]
179
+ df = pd.DataFrame.from_dict(d, orient="columns")
180
+ df.index = [row_index]
181
+ # Join it row wise first
182
+ citations_dataframe = pd.concat([citations_dataframe, df], axis=0)
183
+
184
+ # and then concat it to the original one
185
+ return pd.concat([predictions_df, citations_dataframe], axis=1)
186
+
187
+
188
+ def run_user_score_function(filtered_df, model, pipeline, drum_score_fn, **kwargs):
189
+ """
190
+ A wrapper to execute user's `score` method. Wrapper is useful to calculate the
191
+ latency of the `score` method and handle any exceptional conditions
192
+
193
+ Args:
194
+ filtered_df: Input DataFrame to execute `score` on. In the presence of
195
+ prescore guards, it should be `filtered_df` returned by the method
196
+ `run_prescore_guards`. Otherwise, it is an input dataframe received
197
+ from the user
198
+ model: Model object as passed by DRUM
199
+ pipeline: Guard Pipeline
200
+ drum_score_fn: The `score` method to execute
201
+ **kwargs:
202
+
203
+ Returns:
204
+ predictions_df: DataFrame obtained as a return value from user's `score`
205
+ method
206
+ score_latency: Latency to execute user's `score` method
207
+ """
208
+ response_column_name = pipeline.get_input_column(GuardStage.RESPONSE)
209
+ start_time = time.time()
210
+
211
+ try:
212
+ predictions_df = drum_score_fn(filtered_df, model, **kwargs)
213
+ except Exception as e:
214
+ title = "Failed to execute user score function"
215
+ message = f"Exception: {e}"
216
+ _logger.error(title + " " + message)
217
+ pd.set_option("display.max_columns", None)
218
+ _logger.error(filtered_df)
219
+ pipeline.send_event_sync(
220
+ title, message, ModerationEventTypes.MODERATION_MODEL_SCORING_ERROR
221
+ )
222
+ raise
223
+
224
+ if response_column_name not in predictions_df.columns:
225
+ title = "Cannot execute postscore guards"
226
+ message = (
227
+ "Missing response column in predictions df, can't run postscore guards - "
228
+ f"Columns received: {predictions_df.columns}, "
229
+ f"Response column expected: {response_column_name}"
230
+ )
231
+ _logger.error(message)
232
+ pipeline.send_event_sync(
233
+ title, message, ModerationEventTypes.MODERATION_MODEL_SCORING_ERROR
234
+ )
235
+ pd.set_option("display.max_columns", None)
236
+ _logger.error(predictions_df)
237
+ raise Exception(
238
+ f"Response column name {response_column_name} is missing in "
239
+ "the predictions df returned by custom.py"
240
+ )
241
+
242
+ # Temporarily add citation columns to predictions_df
243
+ predictions_df = __add_citation_columns_to_predictions_df(predictions_df)
244
+ # Because 'score' function index is not same as filtered data index
245
+ # we need to match the indexes first
246
+ predictions_df.index = filtered_df.index
247
+ none_predictions_df = predictions_df[predictions_df[response_column_name].isnull()]
248
+ valid_predictions_df = predictions_df[predictions_df[response_column_name].notnull()]
249
+ end_time = time.time()
250
+ score_latency = end_time - start_time
251
+ pipeline.report_score_latency(score_latency)
252
+ return valid_predictions_df, none_predictions_df, score_latency
253
+
254
+
255
+ def _auto_generate_association_ids(num_rows):
256
+ _logger.info(f"Auto generating {num_rows} association ids")
257
+ return [str(uuid.uuid4()) for _ in range(num_rows)]
258
+
259
+
260
+ def guard_score_wrapper(data, model, pipeline, drum_score_fn, **kwargs):
261
+ """
262
+ Score wrapper function provided by the moderation library. DRUM will invoke this
263
+ function with the user's score function. The wrapper will execute following steps:
264
+
265
+ 1. Run prescore guards
266
+ 2. Execute user's `score` method
267
+ 3. Run postscore guards
268
+ 4. Assemble the result dataframe using output from steps 1 to 3
269
+ 5. Perform additional metadata calculations (eg. token counts, confidence
270
+ score etc)
271
+
272
+ Args:
273
+ data: Input dataframe sent for predictions by the user
274
+ model: Model object as passed by DRUM
275
+ pipeline: Guard Pipeline (initialized in the `init()` call
276
+ drum_score_fn: User's `score` method
277
+ :return:
278
+ """
279
+ _logger.debug(data)
280
+
281
+ pipeline.get_new_metrics_payload()
282
+ prompt_column_name = pipeline.get_input_column(GuardStage.PROMPT)
283
+ association_id_column_name = pipeline.get_association_id_column_name()
284
+ if (
285
+ association_id_column_name
286
+ and association_id_column_name not in data.columns
287
+ and pipeline.auto_generate_association_ids
288
+ ):
289
+ data[association_id_column_name] = _auto_generate_association_ids(data.shape[0])
290
+
291
+ # ==================================================================
292
+ # Step 1: Prescore Guards processing
293
+ #
294
+ prescore_df, filtered_df, prescore_latency = run_prescore_guards(pipeline, data)
295
+
296
+ _logger.debug("After passing input through pre score guards")
297
+ _logger.debug(filtered_df)
298
+ _logger.debug(f"Pre Score Guard Latency: {prescore_latency} sec")
299
+
300
+ if filtered_df.empty:
301
+ blocked_message_prompt_column_name = f"blocked_message_{prompt_column_name}"
302
+ # If all prompts in the input are blocked, means no need to
303
+ # run score function and postscore guards, just simply return
304
+ # the prescore_df
305
+ prescore_df.rename(
306
+ columns={
307
+ blocked_message_prompt_column_name: pipeline.get_input_column(GuardStage.RESPONSE)
308
+ },
309
+ inplace=True,
310
+ )
311
+ pipeline.report_custom_metrics(prescore_df)
312
+ return _handle_result_df_error_cases(prompt_column_name, prescore_df, prescore_latency)
313
+ # ==================================================================
314
+
315
+ # ==================================================================
316
+ # Step 2: custom.py `score` call
317
+ #
318
+ predictions_df, none_predictions_df, score_latency = run_user_score_function(
319
+ filtered_df, model, pipeline, drum_score_fn, **kwargs
320
+ )
321
+ _logger.debug("After invoking user's score function")
322
+ _logger.debug(predictions_df)
323
+
324
+ # Don't lose the association ids if they exist:
325
+ if (
326
+ association_id_column_name
327
+ and association_id_column_name not in predictions_df.columns
328
+ and association_id_column_name in filtered_df.columns
329
+ ):
330
+ predictions_df[association_id_column_name] = filtered_df[association_id_column_name]
331
+ # ==================================================================
332
+
333
+ # ==================================================================
334
+ # Step 3: Postscore Guards processing
335
+ #
336
+ prompt_column_name = pipeline.get_input_column(GuardStage.PROMPT)
337
+ # Required for faithfulness calculation, we get prompt from the filtered_df
338
+ # because it will use the replaced prompt if present.
339
+ predictions_df[prompt_column_name] = filtered_df[prompt_column_name]
340
+
341
+ postscore_df, postscore_latency = run_postscore_guards(pipeline, predictions_df)
342
+
343
+ # ==================================================================
344
+ # Step 4: Assemble the result - we need to merge prescore, postscore
345
+ # Dataframes.
346
+ #
347
+ result_df = format_result_df(pipeline, prescore_df, postscore_df, data, none_predictions_df)
348
+
349
+ # ==================================================================
350
+ # Step 5: Additional metadata calculations
351
+ #
352
+ result_df["datarobot_latency"] = (
353
+ score_latency + prescore_latency + postscore_latency
354
+ ) / result_df.shape[0]
355
+
356
+ return result_df
357
+
358
+
359
+ def format_result_df(pipeline, prescore_df, postscore_df, data, none_predictions_df=None):
360
+ """
361
+ Build the final response dataframe to be returned as response using
362
+ moderation information from prescore and postscore guards as well as
363
+ input dataframe
364
+
365
+ Args:
366
+ pipeline: Guard Pipeline
367
+ prescore_df: `prescore_df` obtained from `run_prescore_guards`
368
+ postscore_df: `postscore_df` obtained from `run_postscore_guards`
369
+ data: Input dataframe sent for predictions by the user
370
+
371
+ Returns:
372
+ result_df: Final dataframe with predictions and moderation information
373
+ combined to be returned to the user
374
+
375
+ """
376
+ prompt_column_name = pipeline.get_input_column(GuardStage.PROMPT)
377
+ blocked_prompt_column_name = f"blocked_{prompt_column_name}"
378
+ blocked_message_prompt_column_name = f"blocked_message_{prompt_column_name}"
379
+ response_column_name = pipeline.get_input_column(GuardStage.RESPONSE)
380
+ blocked_completion_column_name = f"blocked_{response_column_name}"
381
+ unmoderated_response_column_name = f"unmoderated_{response_column_name}"
382
+ moderated_prompt_column_name = f"moderated_{prompt_column_name}"
383
+ replaced_prompt_column_name = f"replaced_{prompt_column_name}"
384
+ replaced_message_prompt_column_name = f"replaced_message_{prompt_column_name}"
385
+
386
+ # This is the final result_df to be returned to the user
387
+ result_columns = (
388
+ set(postscore_df.columns)
389
+ .union(set(prescore_df.columns))
390
+ .union(set(datarobot_metadata_columns))
391
+ .union({unmoderated_response_column_name, moderated_prompt_column_name})
392
+ )
393
+ result_df = pd.DataFrame(index=data.index, columns=list(result_columns))
394
+
395
+ # for the blocked prompts, their completion is the blocked message
396
+ # configured by the guard
397
+ for index, row in prescore_df.iterrows():
398
+ if row.get(blocked_prompt_column_name):
399
+ result_df.loc[index, response_column_name] = row[blocked_message_prompt_column_name]
400
+ result_df.loc[index, unmoderated_response_column_name] = np.nan
401
+ elif row.get(replaced_prompt_column_name):
402
+ result_df.loc[index, moderated_prompt_column_name] = row[
403
+ replaced_message_prompt_column_name
404
+ ]
405
+ else:
406
+ result_df.loc[index, moderated_prompt_column_name] = row[prompt_column_name]
407
+ # Copy metric columns from prescore_df - it has prediction values from
408
+ # the prescore guards, whether prescore guard blocked the text or not
409
+ # what action prescore guard took on that prompt etc
410
+ for column in prescore_df.columns:
411
+ result_df.loc[index, column] = row[column]
412
+
413
+ if none_predictions_df is not None and not none_predictions_df.empty:
414
+ for index, row in none_predictions_df.iterrows():
415
+ result_df.loc[index, response_column_name] = NONE_CUSTOM_PY_RESPONSE
416
+ result_df.loc[index, unmoderated_response_column_name] = NONE_CUSTOM_PY_RESPONSE
417
+ result_df.loc[index, blocked_completion_column_name] = False
418
+ for column in none_predictions_df.columns:
419
+ if column != response_column_name:
420
+ result_df.loc[index, column] = row[column]
421
+
422
+ blocked_message_completion_column_name = f"blocked_message_{response_column_name}"
423
+ replaced_response_column_name = f"replaced_{response_column_name}"
424
+ replaced_message_response_column_name = f"replaced_message_{response_column_name}"
425
+ # Now for the rest of the prompts, we did get completions. If the completion
426
+ # is blocked, use that message, else use the completion. Note that, even if
427
+ # PII Guard has replaced the completion, it will still be under row['completion']
428
+ for index, row in postscore_df.iterrows():
429
+ if row.get(blocked_completion_column_name):
430
+ result_df.loc[index, response_column_name] = row[blocked_message_completion_column_name]
431
+ elif row.get(replaced_response_column_name):
432
+ result_df.loc[index, response_column_name] = row[replaced_message_response_column_name]
433
+ else:
434
+ result_df.loc[index, response_column_name] = row[response_column_name]
435
+ result_df.loc[index, unmoderated_response_column_name] = row[response_column_name]
436
+ # Similarly, copy metric columns from the postscore df - it has prediction
437
+ # values from the postscore guards, whether postscore guard blocked the
438
+ # completion or reported the completion, what action postscore guard took on
439
+ # that completion, citations etc
440
+ for column in postscore_df.columns:
441
+ if column != response_column_name:
442
+ result_df.loc[index, column] = row[column]
443
+
444
+ block_citations_if_prompt_blocked(pipeline, result_df)
445
+ calculate_token_counts_and_confidence_score(pipeline, result_df)
446
+
447
+ result_df = remove_unnecessary_columns(pipeline, result_df)
448
+
449
+ # Single call custom metric reporting
450
+ pipeline.report_custom_metrics(result_df)
451
+
452
+ # Also, ensure that result_df does not contain columns from the input df, creates problem
453
+ # during the data export
454
+ for column in data.columns:
455
+ if column in result_df.columns:
456
+ result_df.drop(column, axis=1, inplace=True)
457
+
458
+ _logger.debug("Return df")
459
+ _logger.debug(result_df)
460
+
461
+ return result_df
462
+
463
+
464
+ def run_user_chat_function(completion_create_params, model, pipeline, drum_chat_fn, **kwargs):
465
+ """
466
+ A wrapper to execute user's `chat` method. Wrapper is useful to calculate the
467
+ latency of the `chat` method and handle any exceptional conditions
468
+
469
+ Args:
470
+ completion_create_params: Prompt with chat history
471
+ model: Model object as passed by DRUM
472
+ pipeline: Guard Pipeline
473
+ drum_chat_fn: The `chat` method to execute
474
+
475
+ Returns:
476
+ chat_completion: ChatCompletion object as returned by the user's chat method
477
+ score_latency: Latency to execute user's `chat` method
478
+ """
479
+ start_time = time.time()
480
+
481
+ try:
482
+ chat_completion = drum_chat_fn(completion_create_params, model, **kwargs)
483
+ except Exception as e:
484
+ _logger.error(f"Failed to execute user chat function: {e}")
485
+ raise
486
+
487
+ end_time = time.time()
488
+ score_latency = end_time - start_time
489
+ pipeline.report_score_latency(score_latency)
490
+
491
+ return chat_completion, score_latency
492
+
493
+
494
+ def build_predictions_df_from_completion(data, pipeline, chat_completion):
495
+ response_column_name = pipeline.get_input_column(GuardStage.RESPONSE)
496
+ predictions_df = data.copy(deep=True)
497
+ if isinstance(chat_completion, ChatCompletion):
498
+ if len(chat_completion.choices) == 0:
499
+ raise ValueError("Invalid response from custom.py, len(choices) = 0")
500
+ predictions_df[response_column_name] = chat_completion.choices[0].message.content
501
+ if getattr(chat_completion, CITATIONS_ATTR, None):
502
+ predictions_df = add_citations_to_df(chat_completion.citations, predictions_df)
503
+ if getattr(chat_completion, USAGE_ATTR, None):
504
+ predictions_df = add_token_count_columns_to_df(
505
+ pipeline, predictions_df, usage=chat_completion.usage
506
+ )
507
+ if pipeline.agentic_metrics_configured():
508
+ pipeline_interactions = getattr(
509
+ chat_completion, AGENTIC_PIPELINE_INTERACTIONS_ATTR, None
510
+ )
511
+ predictions_df[AGENTIC_PIPELINE_INTERACTIONS_ATTR] = pipeline_interactions
512
+
513
+ source_object = chat_completion
514
+ elif isinstance(chat_completion, Iterable):
515
+ # Assemble the chunk in a single message
516
+ messages = []
517
+ last_chunk = None
518
+ for index, chunk in enumerate(chat_completion):
519
+ if not isinstance(chunk, ChatCompletionChunk):
520
+ raise ValueError(
521
+ f"Chunk at index {index} is not of type 'ChatCompletionChunk',"
522
+ f" but is of type '{type(chunk)}'"
523
+ )
524
+ last_chunk = chunk
525
+ if len(chunk.choices) == 0:
526
+ _logger.warning(f"No chunk delta at index {index}, skipping it..")
527
+ continue
528
+ if chunk.choices[0].delta.content:
529
+ # First chunk contents are '' and last chunk contents is None
530
+ # Ignore those 2
531
+ messages.append(chunk.choices[0].delta.content)
532
+ predictions_df[response_column_name] = "".join(messages)
533
+ if getattr(last_chunk, CITATIONS_ATTR, None):
534
+ predictions_df = add_citations_to_df(last_chunk.citations, predictions_df)
535
+ source_object = last_chunk
536
+ else:
537
+ raise ValueError(
538
+ "Object returned by custom.py is not of type 'ChatCompletion' or an "
539
+ f"'Iterable[ChatCompletionChunk], but is of type '{type(chat_completion)}'"
540
+ )
541
+
542
+ extra_attributes = {
543
+ attr: getattr(source_object, attr, None)
544
+ for attr in [
545
+ LLM_BLUEPRINT_ID_ATTR,
546
+ LLM_PROVIDER_GUARDS_ATTR,
547
+ PROMPT_VECTOR_ATTR,
548
+ CITATIONS_ATTR,
549
+ USAGE_ATTR,
550
+ ]
551
+ }
552
+ if pipeline.agentic_metrics_configured():
553
+ extra_attributes[AGENTIC_PIPELINE_INTERACTIONS_ATTR] = getattr(
554
+ source_object, AGENTIC_PIPELINE_INTERACTIONS_ATTR, None
555
+ )
556
+ return predictions_df, extra_attributes
557
+
558
+
559
+ def build_non_streaming_chat_completion(message, reason, extra_attributes=None):
560
+ message = ChatCompletionMessage(content=message, role="assistant")
561
+ choice = Choice(finish_reason=reason, index=0, message=message)
562
+ completion = ChatCompletion(
563
+ id=str(uuid.uuid4()),
564
+ choices=[choice],
565
+ created=int(time.time()),
566
+ model=MODERATION_MODEL_NAME,
567
+ object=CHAT_COMPLETION_OBJECT,
568
+ )
569
+ if extra_attributes:
570
+ for attr, attr_value in extra_attributes.items():
571
+ setattr(completion, attr, attr_value)
572
+ return completion
573
+
574
+
575
+ def _set_moderation_attribute_to_completion(pipeline, chat_completion, df, association_id=None):
576
+ if not pipeline.extra_model_output_for_chat_enabled:
577
+ return chat_completion
578
+
579
+ moderations = build_moderations_attribute_for_completion(pipeline, df)
580
+
581
+ if association_id:
582
+ moderations["association_id"] = association_id
583
+ if isinstance(chat_completion, ChatCompletion):
584
+ setattr(chat_completion, DATAROBOT_MODERATIONS_ATTR, moderations)
585
+ else:
586
+ # Extra attribute to the last chunk of completion
587
+ setattr(chat_completion[-1], DATAROBOT_MODERATIONS_ATTR, moderations)
588
+
589
+ return chat_completion
590
+
591
+
592
+ def get_chat_prompt(completion_create_params):
593
+ # Get the prompt with role = User
594
+ last_user_message = None
595
+ tool_calls = []
596
+ for message in completion_create_params["messages"]:
597
+ if message["role"] == "user":
598
+ last_user_message = message
599
+ if message["role"] == "tool":
600
+ tool_calls.append(f"{message.get('name', '')}_{message['content']}")
601
+ if last_user_message is None:
602
+ raise Exception("No message with 'user' role found in input")
603
+
604
+ prompt_content = last_user_message["content"]
605
+ tool_names = []
606
+ if "tools" in completion_create_params:
607
+ for tool in completion_create_params["tools"]:
608
+ if "function" in tool and "name" in tool["function"]:
609
+ tool_names.append(tool["function"]["name"])
610
+ if isinstance(prompt_content, str):
611
+ chat_prompt = prompt_content
612
+ elif isinstance(prompt_content, list):
613
+ concatenated_prompt = []
614
+ for content in prompt_content:
615
+ if content["type"] == "text":
616
+ message = content["text"]
617
+ elif content["type"] == "image_url":
618
+ message = f"Image URL: {content['image_url']['url']}"
619
+ elif content["type"] == "input_audio":
620
+ message = f"Audio Input, Format: {content['input_audio']['format']}"
621
+ else:
622
+ message = f"Unhandled content type: {content['type']}"
623
+ concatenated_prompt.append(message)
624
+ chat_prompt = "\n".join(concatenated_prompt)
625
+ else:
626
+ raise Exception(f"Unhandled prompt type: {type(prompt_content)}")
627
+
628
+ if len(tool_calls) > 0:
629
+ # Lets not add tool names if tool calls are present. Tool calls are more
630
+ # informative than names
631
+ return "\n".join([chat_prompt, "Tool Calls:", "\n".join(tool_calls)])
632
+
633
+ if len(tool_names) > 0:
634
+ return "\n".join([chat_prompt, "Tool Names:", "\n".join(tool_names)])
635
+
636
+ return chat_prompt
637
+
638
+
639
+ def _is_llm_requesting_user_tool_call(completion):
640
+ if not completion:
641
+ return False, completion
642
+
643
+ if isinstance(completion, ChatCompletion):
644
+ if not completion.choices or len(completion.choices) == 0:
645
+ return False, completion
646
+ if completion.choices[0].finish_reason == "tool_calls":
647
+ return True, completion
648
+ elif hasattr(completion, "__next__"):
649
+ # 'Peek' into first chunk to see if it is a tool call
650
+ chunk = next(completion)
651
+ # either way, make sure the iterator is conserved
652
+ completion = itertools.chain([chunk], completion)
653
+ if (
654
+ not isinstance(chunk, ChatCompletionChunk)
655
+ or not chunk.choices
656
+ or len(chunk.choices) == 0
657
+ or not chunk.choices[0].delta
658
+ or not chunk.choices[0].delta.tool_calls
659
+ ):
660
+ return False, completion
661
+ return True, completion
662
+ return False, completion
663
+
664
+
665
+ def guard_chat_wrapper(
666
+ completion_create_params, model, pipeline, drum_chat_fn, association_id=None, **kwargs
667
+ ):
668
+ pipeline.get_new_metrics_payload()
669
+
670
+ prompt_column_name = pipeline.get_input_column(GuardStage.PROMPT)
671
+ if (
672
+ "messages" not in completion_create_params
673
+ or completion_create_params["messages"] is None
674
+ or len(completion_create_params["messages"]) == 0
675
+ or not isinstance(completion_create_params["messages"][-1], dict)
676
+ or "content" not in completion_create_params["messages"][-1]
677
+ ):
678
+ raise ValueError(f"Invalid chat input for moderation: {completion_create_params}")
679
+
680
+ prompt = get_chat_prompt(completion_create_params)
681
+ streaming_response_requested = completion_create_params.get("stream", False)
682
+
683
+ data = pd.DataFrame({prompt_column_name: [prompt]})
684
+ association_id_column_name = pipeline.get_association_id_column_name()
685
+ if association_id_column_name:
686
+ if association_id:
687
+ data[association_id_column_name] = [association_id]
688
+ elif pipeline.auto_generate_association_ids:
689
+ data[association_id_column_name] = _auto_generate_association_ids(1)
690
+ association_id = data[association_id_column_name].tolist()[0]
691
+
692
+ # ==================================================================
693
+ # Step 1: Prescore Guards processing
694
+ #
695
+ prescore_df, filtered_df, prescore_latency = run_prescore_guards(pipeline, data)
696
+
697
+ _logger.debug("After passing input through pre score guards")
698
+ _logger.debug(filtered_df)
699
+ _logger.debug(f"Pre Score Guard Latency: {prescore_latency} sec")
700
+
701
+ blocked_prompt_column_name = f"blocked_{prompt_column_name}"
702
+ if prescore_df.loc[0, blocked_prompt_column_name]:
703
+ pipeline.report_custom_metrics(prescore_df)
704
+ blocked_message_prompt_column_name = f"blocked_message_{prompt_column_name}"
705
+ # If all prompts in the input are blocked, means history as well as the prompt
706
+ # are not worthy to be sent to LLM
707
+ chat_completion = build_non_streaming_chat_completion(
708
+ prescore_df.loc[0, blocked_message_prompt_column_name],
709
+ "content_filter",
710
+ )
711
+ result_df = _handle_result_df_error_cases(prompt_column_name, prescore_df, prescore_latency)
712
+ if streaming_response_requested:
713
+ streaming_context = (
714
+ StreamingContextBuilder()
715
+ .set_input_df(data)
716
+ .set_prescore_df(result_df)
717
+ .set_prescore_latency(prescore_latency)
718
+ .set_pipeline(pipeline)
719
+ .set_association_id(association_id)
720
+ .build()
721
+ )
722
+ return ModerationIterator(streaming_context, chat_completion)
723
+ else:
724
+ completion = _set_moderation_attribute_to_completion(
725
+ pipeline, chat_completion, result_df, association_id=association_id
726
+ )
727
+ return completion
728
+
729
+ replaced_prompt_column_name = f"replaced_{prompt_column_name}"
730
+ if (
731
+ replaced_prompt_column_name in prescore_df.columns
732
+ and prescore_df.loc[0, replaced_prompt_column_name]
733
+ ):
734
+ # PII kind of guard could have modified the prompt, so use that modified prompt
735
+ # for the user chat function
736
+ _modified_chat = copy.deepcopy(completion_create_params)
737
+ _modified_chat["messages"][-1]["content"] = filtered_df.loc[0, prompt_column_name]
738
+ else:
739
+ # If no modification, use the original input
740
+ _modified_chat = completion_create_params
741
+ # ==================================================================
742
+
743
+ # ==================================================================
744
+ # Step 2: custom.py `chat` call
745
+ #
746
+ chat_completion, score_latency = run_user_chat_function(
747
+ _modified_chat, model, pipeline, drum_chat_fn, **kwargs
748
+ )
749
+ _logger.debug("After invoking user's chat function")
750
+ _logger.debug(chat_completion)
751
+
752
+ # If Tool call, content = None and tools_calls is not empty
753
+ tool_call_request_by_llm, chat_completion = _is_llm_requesting_user_tool_call(chat_completion)
754
+ if tool_call_request_by_llm:
755
+ # Note: There is an opportunity to apply guard here, is LLM
756
+ # asking the user to invoke right call? But, probably future work
757
+ return chat_completion
758
+
759
+ if streaming_response_requested:
760
+ streaming_context = (
761
+ StreamingContextBuilder()
762
+ .set_input_df(data)
763
+ .set_prescore_df(prescore_df)
764
+ .set_prescore_latency(prescore_latency)
765
+ .set_pipeline(pipeline)
766
+ .set_association_id(association_id)
767
+ .build()
768
+ )
769
+ return ModerationIterator(streaming_context, chat_completion)
770
+
771
+ # Rest of the code flow below is non-streaming completion requested
772
+ # ==================================================================
773
+ # Step 3: Postscore Guards processing
774
+ #
775
+ # Prompt column name is already part of data and gets included for
776
+ # faithfulness calculation processing
777
+ response_column_name = pipeline.get_input_column(GuardStage.RESPONSE)
778
+ predictions_df, extra_attributes = build_predictions_df_from_completion(
779
+ data, pipeline, chat_completion
780
+ )
781
+ response = predictions_df.loc[0, response_column_name]
782
+
783
+ if response is not None:
784
+ none_predictions_df = None
785
+ postscore_df, postscore_latency = run_postscore_guards(pipeline, predictions_df)
786
+ else:
787
+ postscore_df, postscore_latency = pd.DataFrame(), 0
788
+ none_predictions_df = predictions_df
789
+
790
+ # ==================================================================
791
+ # Step 4: Assemble the result - we need to merge prescore, postscore
792
+ # Dataframes.
793
+ #
794
+ result_df = format_result_df(
795
+ pipeline, prescore_df, postscore_df, data, none_predictions_df=none_predictions_df
796
+ )
797
+
798
+ # ==================================================================
799
+ # Step 5: Additional metadata calculations
800
+ #
801
+ result_df["datarobot_latency"] = (
802
+ score_latency + prescore_latency + postscore_latency
803
+ ) / result_df.shape[0]
804
+
805
+ response_message, finish_reason = get_response_message_and_finish_reason(pipeline, postscore_df)
806
+
807
+ final_completion = build_non_streaming_chat_completion(
808
+ response_message, finish_reason, extra_attributes
809
+ )
810
+ return _set_moderation_attribute_to_completion(
811
+ pipeline, final_completion, result_df, association_id=association_id
812
+ )
813
+
814
+
815
+ def run_vdb_score_function(
816
+ input_df: pd.DataFrame, model, pipeline, drum_score_fn, **kwargs
817
+ ) -> pd.DataFrame:
818
+ """
819
+ A wrapper to execute vdb's `score` method. Wrapper is useful to calculate the
820
+ latency of the `score` method and handle any exceptional conditions
821
+ Returns:
822
+ predictions_df: DataFrame obtained as a return value from user's `score`
823
+ method
824
+ """
825
+ start_time = time.time()
826
+
827
+ try:
828
+ predictions_df = drum_score_fn(input_df, model, **kwargs)
829
+ except Exception as e:
830
+ title = "Failed to execute vdb score function"
831
+ message = f"Exception: {e}"
832
+ _logger.error(title + " " + message)
833
+ pd.set_option("display.max_columns", None)
834
+ _logger.error(input_df)
835
+ pipeline.send_event_sync(
836
+ title, message, ModerationEventTypes.MODERATION_MODEL_SCORING_ERROR
837
+ )
838
+ raise
839
+
840
+ score_latency = time.time() - start_time
841
+ pipeline.record_score_latency(score_latency)
842
+ return predictions_df
843
+
844
+
845
+ def vdb_score_wrapper(data: pd.DataFrame, model, pipeline: VDBPipeline, drum_score_fn, **kwargs):
846
+ """
847
+ Run on each prediction, and takes care of running the "score" function as well
848
+ as collecting the metrics for the VDB pipeline.
849
+ """
850
+ _logger.debug(data)
851
+
852
+ # clear/allocate memory for reporting metrics
853
+ pipeline.get_new_metrics_payload()
854
+
855
+ # NOTE: no "pre-score" calculation on the DataFrame for the predictions
856
+
857
+ # perform the main "score" function for this model
858
+ predictions_df = run_vdb_score_function(data, model, pipeline, drum_score_fn, **kwargs)
859
+
860
+ # loop through all the metrics scoring for the pipeline with predictions_df that has citations
861
+ for scorer in pipeline.scorers():
862
+ value = scorer.score(predictions_df)
863
+ pipeline.record_aggregate_value(scorer.name, value)
864
+
865
+ pipeline.report_custom_metrics()
866
+ return predictions_df
867
+
868
+
869
+ def vdb_init():
870
+ """Initializes a VDB pipeline."""
871
+ return VDBPipeline()
872
+
873
+
874
+ def init(model_dir: str = os.getcwd()):
875
+ """
876
+ Initialize the moderation framework
877
+
878
+ Returns:
879
+ pipeline: A Guard pipeline object required to enforce moderations while
880
+ scoring on user data
881
+ """
882
+ disable_moderation_runtime_value = json.loads(
883
+ os.environ.get(DISABLE_MODERATION_RUNTIME_PARAM_NAME, "{}")
884
+ )
885
+ if (
886
+ "payload" in disable_moderation_runtime_value
887
+ and disable_moderation_runtime_value["payload"]
888
+ ):
889
+ _logger.warning("Moderation is disabled via runtime parameter on the model")
890
+ return None
891
+
892
+ guard_config_file = os.path.join(model_dir, MODERATION_CONFIG_FILE_NAME)
893
+ if not os.path.exists(guard_config_file):
894
+ _logger.warning(
895
+ f"Guard config file: {guard_config_file} not found in the model directory,"
896
+ " moderations will not be enforced on this model"
897
+ )
898
+ return None
899
+ pipeline = LLMPipeline(guard_config_file)
900
+ # Lets export the PROMPT_COLUMN_NAME for custom.py
901
+ os.environ["PROMPT_COLUMN_NAME"] = pipeline.get_input_column(GuardStage.PROMPT)
902
+ os.environ["RESPONSE_COLUMN_NAME"] = pipeline.get_input_column(GuardStage.RESPONSE)
903
+ return pipeline
904
+
905
+
906
+ class ModerationPipeline:
907
+ """Base class to simplify interactions with DRUM."""
908
+
909
+ def score(self, input_df: pd.DataFrame, model, drum_score_fn, **kwargs):
910
+ """Default score function just runs the DRUM score function."""
911
+ return drum_score_fn(input_df, model, **kwargs)
912
+
913
+ def chat(
914
+ self,
915
+ completion_create_params: pd.DataFrame,
916
+ model,
917
+ drum_chat_fn,
918
+ association_id: str = None,
919
+ **kwargs,
920
+ ):
921
+ """Default chat wrapper function just runs the DRUM chat function."""
922
+ return drum_chat_fn(
923
+ completion_create_params, model, association_id=association_id, **kwargs
924
+ )
925
+
926
+
927
+ class LlmModerationPipeline(ModerationPipeline):
928
+ def __init__(self, pipeline: LLMPipeline):
929
+ self._pipeline = pipeline
930
+
931
+ def score(self, data: pd.DataFrame, model, drum_score_fn, **kwargs):
932
+ """Calls the standard guard score function."""
933
+ return guard_score_wrapper(data, model, self._pipeline, drum_score_fn, **kwargs)
934
+
935
+ def chat(
936
+ self,
937
+ completion_create_params: pd.DataFrame,
938
+ model,
939
+ drum_chat_fn,
940
+ association_id=None,
941
+ **kwargs,
942
+ ):
943
+ """Calls the standard guard chat function."""
944
+ return guard_chat_wrapper(
945
+ completion_create_params,
946
+ model,
947
+ self._pipeline,
948
+ drum_chat_fn,
949
+ association_id=association_id,
950
+ **kwargs,
951
+ )
952
+
953
+
954
+ class VdbModerationPipeline(ModerationPipeline):
955
+ def __init__(self, pipeline: VDBPipeline):
956
+ self._pipeline = pipeline
957
+
958
+ def score(self, data: pd.DataFrame, model, drum_score_fn, **kwargs):
959
+ """Calls the VDB score function."""
960
+ return vdb_score_wrapper(data, model, self._pipeline, drum_score_fn, **kwargs)
961
+
962
+
963
+ def moderation_pipeline_factory(
964
+ target_type: str, model_dir: str = os.getcwd()
965
+ ) -> Optional[ModerationPipeline]:
966
+ if target_type in TargetType.guards():
967
+ pipeline = init(model_dir=model_dir)
968
+ if pipeline:
969
+ return LlmModerationPipeline(pipeline)
970
+
971
+ if target_type in TargetType.vdb():
972
+ pipeline = vdb_init()
973
+ if pipeline:
974
+ return VdbModerationPipeline(pipeline)
975
+
976
+ _logger.warning(f"Unsupported target type: {target_type}")
977
+ return None