PyPI - datarobot-moderations - Versions diffs - 11.2.3__py3-none-any.whl → 11.2.4__py3-none-any.whl - Mend

datarobot-moderations 11.2.3py3-none-any.whl → 11.2.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

datarobot_dome/async_http_client.py CHANGED Viewed

@@ -28,6 +28,7 @@ from datarobot_dome.constants import DATAROBOT_CONFIGURED_ON_PREM_ST_SAAS_URL
 from datarobot_dome.constants import DATAROBOT_SERVERLESS_PLATFORM
 from datarobot_dome.constants import DEFAULT_GUARD_PREDICTION_TIMEOUT_IN_SEC
 from datarobot_dome.constants import LOGGER_NAME_PREFIX
+from datarobot_dome.constants import MODERATIONS_USER_AGENT
 from datarobot_dome.constants import RETRY_COUNT
 from datarobot_dome.constants import ModerationEventTypes
@@ -81,11 +82,13 @@ class AsyncHTTPClient:
             "Content-Type": "text/csv",
             "Accept": "text/csv",
             "Authorization": f"Bearer {os.environ['DATAROBOT_API_TOKEN']}",
+            "User-Agent": MODERATIONS_USER_AGENT,
         }
         self.json_headers = {
             "Content-Type": "application/json",
             "Accept": "application/json",
             "Authorization": f"Bearer {os.environ['DATAROBOT_API_TOKEN']}",
+            "User-Agent": MODERATIONS_USER_AGENT,
         }
         self.session = None
         self.events_url = f"{os.environ['DATAROBOT_ENDPOINT']}/remoteEvents/"
@@ -100,9 +103,9 @@ class AsyncHTTPClient:
                 asyncio.set_event_loop(self.loop)
             else:
                 raise
+        nest_asyncio.apply(loop=self.loop)
         self.loop.run_until_complete(self.__create_client_session(timeout))
         self.loop.set_debug(True)
-        nest_asyncio.apply(loop=self.loop)
         atexit.register(self.shutdown)

datarobot_dome/constants.py CHANGED Viewed

@@ -10,9 +10,12 @@
 #  https://www.datarobot.com/wp-content/uploads/2021/07/DataRobot-Tool-and-Utility-Agreement.pdf.
 #  ---------------------------------------------------------------------------------
 from enum import Enum
+from importlib.metadata import version
 __GUARD_ASSOCIATION_IDS_COLUMN_NAME__ = "datarobot_guard_association_id"
+MODERATIONS_USER_AGENT = f"datarobot-moderations:{version('datarobot-moderations')}"
 LOGGER_NAME_PREFIX = "moderations"
 DEFAULT_PROMPT_COLUMN_NAME = "promptText"
@@ -71,6 +74,8 @@ PROMPT_TOKEN_COUNT_COLUMN_NAME_FROM_USAGE = "prompt_token_count_from_usage"
 RESPONSE_TOKEN_COUNT_COLUMN_NAME_FROM_USAGE = "response_token_count_from_usage"
 SPAN_PREFIX = "datarobot.guard"
+DATAROBOT_EXTRA_BODY_PREFIX = "datarobot_"
+DATAROBOT_ASSOCIATION_ID_FIELD_NAME = "datarobot_association_id"
 class TargetType(str, Enum):

datarobot_dome/drum_integration.py CHANGED Viewed

@@ -24,6 +24,7 @@ import numpy as np
 import pandas as pd
 import yaml
 from openai.types.chat import ChatCompletionChunk
+from openai.types.chat import CompletionCreateParams
 from openai.types.chat.chat_completion import ChatCompletion
 from openai.types.chat.chat_completion import Choice
 from openai.types.chat.chat_completion_message import ChatCompletionMessage
@@ -40,6 +41,8 @@ from datarobot_dome.chat_helper import run_postscore_guards
 from datarobot_dome.constants import AGENTIC_PIPELINE_INTERACTIONS_ATTR
 from datarobot_dome.constants import CHAT_COMPLETION_OBJECT
 from datarobot_dome.constants import CITATIONS_ATTR
+from datarobot_dome.constants import DATAROBOT_ASSOCIATION_ID_FIELD_NAME
+from datarobot_dome.constants import DATAROBOT_EXTRA_BODY_PREFIX
 from datarobot_dome.constants import DATAROBOT_MODERATIONS_ATTR
 from datarobot_dome.constants import DISABLE_MODERATION_RUNTIME_PARAM_NAME
 from datarobot_dome.constants import LLM_BLUEPRINT_ID_ATTR
@@ -590,6 +593,29 @@ def _set_moderation_attribute_to_completion(pipeline, chat_completion, df, assoc
 def get_chat_prompt(completion_create_params):
+    """
+    Validate and extract the user prompt from completion create parameters (CCP).
+    Include tool calls if they were provided.
+    CCP "messages" list must be non-empty and include content with "user" role.
+    Example: "messages": [{"role": "user", "content": "What is the meaning of life?"}]
+    :param completion_create_params: dict containing chat request
+    :return: constructed prompt based on CCP content.
+    :raise ValueError if completion create parameters is not valid.
+    """
+    # ensure message content exists
+    if (
+        "messages" not in completion_create_params
+        or completion_create_params["messages"] is None
+        or len(completion_create_params["messages"]) == 0
+        or not isinstance(completion_create_params["messages"][-1], dict)
+        or "content" not in completion_create_params["messages"][-1]
+    ):
+        raise ValueError(
+            f"Chat input for moderation does not contain a message: {completion_create_params}"
+        )
     # Get the prompt with role = User
     last_user_message = None
     tool_calls = []
@@ -599,7 +625,7 @@ def get_chat_prompt(completion_create_params):
         if message["role"] == "tool":
             tool_calls.append(f"{message.get('name', '')}_{message['content']}")
     if last_user_message is None:
-        raise Exception("No message with 'user' role found in input")
+        raise ValueError("No message with 'user' role found in input")
     prompt_content = last_user_message["content"]
     tool_names = []
@@ -623,7 +649,7 @@ def get_chat_prompt(completion_create_params):
             concatenated_prompt.append(message)
         chat_prompt = "\n".join(concatenated_prompt)
     else:
-        raise Exception(f"Unhandled prompt type: {type(prompt_content)}")
+        raise ValueError(f"Unhandled prompt type: {type(prompt_content)}")
     if len(tool_calls) > 0:
         # Lets not add tool names if tool calls are present.  Tool calls are more
@@ -693,32 +719,81 @@ def report_otel_evaluation_set_metric(pipeline, result_df):
     current_span.set_attribute("datarobot.moderation.evaluation", json.dumps(final_value))
+def filter_extra_body(
+    completion_create_params: CompletionCreateParams,
+) -> tuple[CompletionCreateParams, list]:
+    """
+    completion_create_params is a typed dict of a few standard fields,
+    and arbitrary fields from extra_body.
+    For all fields matching "datarobot_", copy them to a list for later use, and remove them
+    from completion_create_params.
+    :param completion_create_params: the chat completion params from OpenAI client via DRUM
+    :return: filtered completion_create_params and list of "datarobot_" fields
+    """
+    datarobot_extra_body_params = []
+    our_param_names = [
+        p for p in completion_create_params if p.startswith(DATAROBOT_EXTRA_BODY_PREFIX)
+    ]
+    for name in our_param_names:
+        value = completion_create_params[name]
+        datarobot_extra_body_params.append({name: value})
+        _logger.debug("found DataRobot parameter in extra_body: %s", f"{name}={value}")
+        completion_create_params.pop(name, None)
+    return completion_create_params, datarobot_extra_body_params
+def filter_association_id(
+    completion_create_params: CompletionCreateParams,
+) -> tuple[CompletionCreateParams, str | None]:
+    """
+    completion_create_params (CCP) is a typed dict of a few standard fields,
+    and arbitrary fields from extra_body.
+    If a field for the association ID exists, extract that value and remove it from the CCP.
+    Do this before calling filter_extra_body(), which would otherwise capture the association ID.
+    :param completion_create_params: the chat completion params from OpenAI client via DRUM
+    :return: filtered completion_create_params, association ID value
+    If no association ID was found in extra body: return original CCP,None
+    """
+    name = DATAROBOT_ASSOCIATION_ID_FIELD_NAME
+    if name in completion_create_params:
+        value = completion_create_params[name]
+        _logger.debug("found association ID in extra_body: %s", f"{name}={value}")
+        completion_create_params.pop(name, None)
+        return completion_create_params, value
+    return completion_create_params, None
 def guard_chat_wrapper(
     completion_create_params, model, pipeline, drum_chat_fn, association_id=None, **kwargs
 ):
+    # if association ID was included in extra_body, extract field name and value
+    completion_create_params, eb_assoc_id_value = filter_association_id(completion_create_params)
+    # todo future: filter extra_body params here; pass to pipeline.report_custom_metrics
+    # completion_create_params, chat_extra_body_params = filter_extra_body(completion_create_params)
     pipeline.get_new_metrics_payload()
+    # the chat request is not a dataframe, but we'll build a DF internally for moderation.
     prompt_column_name = pipeline.get_input_column(GuardStage.PROMPT)
-    if (
-        "messages" not in completion_create_params
-        or completion_create_params["messages"] is None
-        or len(completion_create_params["messages"]) == 0
-        or not isinstance(completion_create_params["messages"][-1], dict)
-        or "content" not in completion_create_params["messages"][-1]
-    ):
-        raise ValueError(f"Invalid chat input for moderation: {completion_create_params}")
     prompt = get_chat_prompt(completion_create_params)
     streaming_response_requested = completion_create_params.get("stream", False)
     data = pd.DataFrame({prompt_column_name: [prompt]})
+    # for association IDs (with or without extra_body): the column must be defined in the deployment
+    # (here, this means pipeline.get_association_id_column_name() ("standard name") is not empty.)
+    # there are 3 likely cases for association ID, and 1 corner case:
+    # 1. ID value not provided (drum or extra_body) => no association ID column
+    # 2. ID value provided by DRUM => new DF column with standard name and provided value
+    # 3. ID defined in extra_body => new DF column with standard name and extra_body value
+    # 4. ID in extra_body with empty value => no association ID column
+    # Moderation library no longer auto-generates an association ID for chat. However, DRUM does.
     association_id_column_name = pipeline.get_association_id_column_name()
+    association_id = eb_assoc_id_value or association_id
     if association_id_column_name:
         if association_id:
             data[association_id_column_name] = [association_id]
-        elif pipeline.auto_generate_association_ids:
-            data[association_id_column_name] = pipeline.generate_association_ids(1)
-            association_id = data[association_id_column_name].tolist()[0]
     # ==================================================================
     # Step 1: Prescore Guards processing
@@ -729,6 +804,11 @@ def guard_chat_wrapper(
     _logger.debug(filtered_df)
     _logger.debug(f"Pre Score Guard Latency: {prescore_latency} sec")
+    # todo future: add extra_body parameters to custom metrics reporting
+    # _logger.debug("Add extra_body params as custom metrics")
+    # for param in chat_extra_body_params:
+    #     _logger.debug(f"Future: add extra_body param: {param}")
     blocked_prompt_column_name = f"blocked_{prompt_column_name}"
     if prescore_df.loc[0, blocked_prompt_column_name]:
         pipeline.report_custom_metrics(prescore_df)
@@ -890,7 +970,11 @@ def init(model_dir: str = os.getcwd()):
 class ModerationPipeline:
-    """Base class to simplify interactions with DRUM."""
+    """
+    Base class to simplify interactions with DRUM.
+    This class is not used outside of testing;
+    moderation_pipeline_factory() will select the LLM or VDB subclass instead.
+    """
     def score(self, input_df: pd.DataFrame, model, drum_score_fn, **kwargs):
         """Default score function just runs the DRUM score function."""
@@ -898,7 +982,7 @@ class ModerationPipeline:
     def chat(
         self,
-        completion_create_params: pd.DataFrame,
+        completion_create_params: CompletionCreateParams,
         model,
         drum_chat_fn,
         association_id: str = None,
@@ -920,7 +1004,7 @@ class LlmModerationPipeline(ModerationPipeline):
     def chat(
         self,
-        completion_create_params: pd.DataFrame,
+        completion_create_params: CompletionCreateParams,
         model,
         drum_chat_fn,
         association_id=None,
@@ -949,6 +1033,14 @@ class VdbModerationPipeline(ModerationPipeline):
 def moderation_pipeline_factory(
     target_type: str, model_dir: str = os.getcwd()
 ) -> Optional[ModerationPipeline]:
+    """
+    Create and return a moderation pipeline based on model target type.
+    This function is the main integration point with DRUM;
+    called by DRUM's PythonModelAdapter._load_moderation_hooks.
+    :param target_type: usually textgen, agentic, or vdb
+    :param model_dir:
+    :return:
+    """
     # Disable ragas and deepeval tracking while loading the module.
     os.environ["RAGAS_DO_NOT_TRACK"] = "true"
     os.environ["DEEPEVAL_TELEMETRY_OPT_OUT"] = "YES"

datarobot_dome/pipeline/llm_pipeline.py CHANGED Viewed

@@ -113,7 +113,7 @@ class LLMPipeline(Pipeline):
         self._custom_model_dir = os.path.dirname(guards_config_filename)
         self._modifier_guard_seen = {stage: None for stage in GuardStage.ALL}
-        self.auto_generate_association_ids = False
+        self.auto_generate_association_ids = False  # used for score, but not used for chat
         # Dictionary of async http clients per process - its important to maintain
         # this when moderation is running with CUSTOM_MODEL_WORKERS > 1

datarobot_dome/pipeline/pipeline.py CHANGED Viewed

@@ -29,6 +29,7 @@ from datarobot.models.deployment import CustomMetric
 from datarobot_dome.async_http_client import AsyncHTTPClient
 from datarobot_dome.constants import DEFAULT_GUARD_PREDICTION_TIMEOUT_IN_SEC
 from datarobot_dome.constants import LOGGER_NAME_PREFIX
+from datarobot_dome.constants import MODERATIONS_USER_AGENT
 from datarobot_dome.constants import ModerationEventTypes
 CUSTOM_METRICS_BULK_UPLOAD_API_PREFIX = "deployments"
@@ -81,6 +82,7 @@ class Pipeline:
         self._headers = {
             "Content-Type": "application/json",
             "Authorization": f"Bearer {self._datarobot_api_token}",
+            "User-Agent": MODERATIONS_USER_AGENT,
         }
     def _query_self_deployment(self):

{datarobot_moderations-11.2.3.dist-info → datarobot_moderations-11.2.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.3
+Metadata-Version: 2.4
 Name: datarobot-moderations
-Version: 11.2.3
+Version: 11.2.4
 Summary: DataRobot Monitoring and Moderation framework
 License: DataRobot Tool and Utility Agreement
 Author: DataRobot

{datarobot_moderations-11.2.3.dist-info → datarobot_moderations-11.2.4.dist-info}/RECORD RENAMED Viewed

@@ -1,8 +1,8 @@
 datarobot_dome/__init__.py,sha256=B5Rx8_CNCNsOpxBbRj27XOXCfRZmvmrAR-NzlzIKnDw,583
-datarobot_dome/async_http_client.py,sha256=wkB4irwvnchNGzO1bk2C_HWM-GOSB3AUn5TXKl-X0ZI,9649
+datarobot_dome/async_http_client.py,sha256=cQFoSI2ovt0Kyk4XWQPXod5PAfA-ZPkjLYVWQZhDGDE,9809
 datarobot_dome/chat_helper.py,sha256=BzvtUyZSZxzOqq-5a2wQKhHhr2kMlcP1MFrHaDAeD_o,9671
-datarobot_dome/constants.py,sha256=vM2_JkXbn4dkWARCqxNfLriSo0E05LDXVrwNktptpuc,10416
-datarobot_dome/drum_integration.py,sha256=BnhAP-D4AaEeh4ferZ-qXnORuWQzYzw9qKAZUTZZnJU,40542
+datarobot_dome/constants.py,sha256=EtdmYdEp9H2awbJVo2Xfmk5PFCJ0nymMSAPIAt8pQgE,10649
+datarobot_dome/drum_integration.py,sha256=nLENtjQEP4nwwyrtesQTj2844I-ap_HwHKvijfxz0Ng,45121
 datarobot_dome/guard.py,sha256=xJds9hcbUaS-KD5nC1mn0GiPdBrileFUu6BuTAjDNuY,34668
 datarobot_dome/guard_executor.py,sha256=ox5_jOHcqMaxaaagIYJJHhCwEI7Wg-rUEiu5rutsfVU,35363
 datarobot_dome/guard_helpers.py,sha256=jfu8JTWCcxu4WD1MKxeP1n53DeebY3SSuP-t5sWyV1U,17187
@@ -14,11 +14,11 @@ datarobot_dome/metrics/citation_metrics.py,sha256=l2mnV1gz7nQeJ_yfaS4dcP3DFWf0p5
 datarobot_dome/metrics/factory.py,sha256=7caa8paI9LuFXDgguXdC4on28V7IwwIsKJT2Z-Aps8A,2187
 datarobot_dome/metrics/metric_scorer.py,sha256=uJ_IJRw7ZFHueg8xjsaXbt0ypO7JiydZ0WapCp96yng,2540
 datarobot_dome/pipeline/__init__.py,sha256=B5Rx8_CNCNsOpxBbRj27XOXCfRZmvmrAR-NzlzIKnDw,583
-datarobot_dome/pipeline/llm_pipeline.py,sha256=DMZ4gu88MiSSEQtshDyHOzT3R2Seuf8UqZ7A36QHj3M,18772
-datarobot_dome/pipeline/pipeline.py,sha256=7UmvrZtNxTGewpgM4cf2oThHPoJSarEU1Dyp7xEsASU,17401
+datarobot_dome/pipeline/llm_pipeline.py,sha256=4Q-DW8lzKdPBDTNgO-wI-Pyl53IRZNJcjJpfE3kiv08,18813
+datarobot_dome/pipeline/pipeline.py,sha256=GM1mmFtk4xm2xmHiFOefno4K38FNjdMfrynpsp6MLX0,17511
 datarobot_dome/pipeline/vdb_pipeline.py,sha256=q3c_Z-hGUqhH6j6n8VpS3wZiBIkWgpRDsBnyJyZhiw4,9855
 datarobot_dome/runtime.py,sha256=FD8wXOweqoQVzbZMh-mucL66xT2kGxPsJUGAcJBgwxw,1468
 datarobot_dome/streaming.py,sha256=DkvKEH0yN0aPEWMTAjMFJB3Kx4iLGdjUMQU1pAplbeg,17751
-datarobot_moderations-11.2.3.dist-info/METADATA,sha256=dzpTYxhAXg-NEm8Rrko8U8qvbQncQoGw93a9ZhWV3jo,4742
-datarobot_moderations-11.2.3.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
-datarobot_moderations-11.2.3.dist-info/RECORD,,
+datarobot_moderations-11.2.4.dist-info/METADATA,sha256=iksvFgFDIQZA7DF0vR6fICFawRl7xcdl0hy_E4QAakg,4742
+datarobot_moderations-11.2.4.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
+datarobot_moderations-11.2.4.dist-info/RECORD,,

{datarobot_moderations-11.2.3.dist-info → datarobot_moderations-11.2.4.dist-info}/WHEEL RENAMED Viewed

@@ -1,4 +1,4 @@
 Wheel-Version: 1.0
-Generator: poetry-core 2.1.3
+Generator: poetry-core 2.2.1
 Root-Is-Purelib: true
 Tag: py3-none-any

datarobot-moderations 11.2.3__py3-none-any.whl → 11.2.4__py3-none-any.whl

datarobot-moderations 11.2.3py3-none-any.whl → 11.2.4py3-none-any.whl