datarobot-moderations 11.2.3__py3-none-any.whl → 11.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -28,6 +28,7 @@ from datarobot_dome.constants import DATAROBOT_CONFIGURED_ON_PREM_ST_SAAS_URL
28
28
  from datarobot_dome.constants import DATAROBOT_SERVERLESS_PLATFORM
29
29
  from datarobot_dome.constants import DEFAULT_GUARD_PREDICTION_TIMEOUT_IN_SEC
30
30
  from datarobot_dome.constants import LOGGER_NAME_PREFIX
31
+ from datarobot_dome.constants import MODERATIONS_USER_AGENT
31
32
  from datarobot_dome.constants import RETRY_COUNT
32
33
  from datarobot_dome.constants import ModerationEventTypes
33
34
 
@@ -81,11 +82,13 @@ class AsyncHTTPClient:
81
82
  "Content-Type": "text/csv",
82
83
  "Accept": "text/csv",
83
84
  "Authorization": f"Bearer {os.environ['DATAROBOT_API_TOKEN']}",
85
+ "User-Agent": MODERATIONS_USER_AGENT,
84
86
  }
85
87
  self.json_headers = {
86
88
  "Content-Type": "application/json",
87
89
  "Accept": "application/json",
88
90
  "Authorization": f"Bearer {os.environ['DATAROBOT_API_TOKEN']}",
91
+ "User-Agent": MODERATIONS_USER_AGENT,
89
92
  }
90
93
  self.session = None
91
94
  self.events_url = f"{os.environ['DATAROBOT_ENDPOINT']}/remoteEvents/"
@@ -100,9 +103,9 @@ class AsyncHTTPClient:
100
103
  asyncio.set_event_loop(self.loop)
101
104
  else:
102
105
  raise
106
+ nest_asyncio.apply(loop=self.loop)
103
107
  self.loop.run_until_complete(self.__create_client_session(timeout))
104
108
  self.loop.set_debug(True)
105
- nest_asyncio.apply(loop=self.loop)
106
109
 
107
110
  atexit.register(self.shutdown)
108
111
 
@@ -10,9 +10,12 @@
10
10
  # https://www.datarobot.com/wp-content/uploads/2021/07/DataRobot-Tool-and-Utility-Agreement.pdf.
11
11
  # ---------------------------------------------------------------------------------
12
12
  from enum import Enum
13
+ from importlib.metadata import version
13
14
 
14
15
  __GUARD_ASSOCIATION_IDS_COLUMN_NAME__ = "datarobot_guard_association_id"
15
16
 
17
+ MODERATIONS_USER_AGENT = f"datarobot-moderations:{version('datarobot-moderations')}"
18
+
16
19
  LOGGER_NAME_PREFIX = "moderations"
17
20
 
18
21
  DEFAULT_PROMPT_COLUMN_NAME = "promptText"
@@ -71,6 +74,8 @@ PROMPT_TOKEN_COUNT_COLUMN_NAME_FROM_USAGE = "prompt_token_count_from_usage"
71
74
  RESPONSE_TOKEN_COUNT_COLUMN_NAME_FROM_USAGE = "response_token_count_from_usage"
72
75
 
73
76
  SPAN_PREFIX = "datarobot.guard"
77
+ DATAROBOT_EXTRA_BODY_PREFIX = "datarobot_"
78
+ DATAROBOT_ASSOCIATION_ID_FIELD_NAME = "datarobot_association_id"
74
79
 
75
80
 
76
81
  class TargetType(str, Enum):
@@ -24,6 +24,7 @@ import numpy as np
24
24
  import pandas as pd
25
25
  import yaml
26
26
  from openai.types.chat import ChatCompletionChunk
27
+ from openai.types.chat import CompletionCreateParams
27
28
  from openai.types.chat.chat_completion import ChatCompletion
28
29
  from openai.types.chat.chat_completion import Choice
29
30
  from openai.types.chat.chat_completion_message import ChatCompletionMessage
@@ -40,6 +41,8 @@ from datarobot_dome.chat_helper import run_postscore_guards
40
41
  from datarobot_dome.constants import AGENTIC_PIPELINE_INTERACTIONS_ATTR
41
42
  from datarobot_dome.constants import CHAT_COMPLETION_OBJECT
42
43
  from datarobot_dome.constants import CITATIONS_ATTR
44
+ from datarobot_dome.constants import DATAROBOT_ASSOCIATION_ID_FIELD_NAME
45
+ from datarobot_dome.constants import DATAROBOT_EXTRA_BODY_PREFIX
43
46
  from datarobot_dome.constants import DATAROBOT_MODERATIONS_ATTR
44
47
  from datarobot_dome.constants import DISABLE_MODERATION_RUNTIME_PARAM_NAME
45
48
  from datarobot_dome.constants import LLM_BLUEPRINT_ID_ATTR
@@ -590,6 +593,29 @@ def _set_moderation_attribute_to_completion(pipeline, chat_completion, df, assoc
590
593
 
591
594
 
592
595
  def get_chat_prompt(completion_create_params):
596
+ """
597
+ Validate and extract the user prompt from completion create parameters (CCP).
598
+ Include tool calls if they were provided.
599
+
600
+ CCP "messages" list must be non-empty and include content with "user" role.
601
+ Example: "messages": [{"role": "user", "content": "What is the meaning of life?"}]
602
+
603
+ :param completion_create_params: dict containing chat request
604
+ :return: constructed prompt based on CCP content.
605
+ :raise ValueError if completion create parameters is not valid.
606
+ """
607
+ # ensure message content exists
608
+ if (
609
+ "messages" not in completion_create_params
610
+ or completion_create_params["messages"] is None
611
+ or len(completion_create_params["messages"]) == 0
612
+ or not isinstance(completion_create_params["messages"][-1], dict)
613
+ or "content" not in completion_create_params["messages"][-1]
614
+ ):
615
+ raise ValueError(
616
+ f"Chat input for moderation does not contain a message: {completion_create_params}"
617
+ )
618
+
593
619
  # Get the prompt with role = User
594
620
  last_user_message = None
595
621
  tool_calls = []
@@ -599,7 +625,7 @@ def get_chat_prompt(completion_create_params):
599
625
  if message["role"] == "tool":
600
626
  tool_calls.append(f"{message.get('name', '')}_{message['content']}")
601
627
  if last_user_message is None:
602
- raise Exception("No message with 'user' role found in input")
628
+ raise ValueError("No message with 'user' role found in input")
603
629
 
604
630
  prompt_content = last_user_message["content"]
605
631
  tool_names = []
@@ -623,7 +649,7 @@ def get_chat_prompt(completion_create_params):
623
649
  concatenated_prompt.append(message)
624
650
  chat_prompt = "\n".join(concatenated_prompt)
625
651
  else:
626
- raise Exception(f"Unhandled prompt type: {type(prompt_content)}")
652
+ raise ValueError(f"Unhandled prompt type: {type(prompt_content)}")
627
653
 
628
654
  if len(tool_calls) > 0:
629
655
  # Lets not add tool names if tool calls are present. Tool calls are more
@@ -693,32 +719,81 @@ def report_otel_evaluation_set_metric(pipeline, result_df):
693
719
  current_span.set_attribute("datarobot.moderation.evaluation", json.dumps(final_value))
694
720
 
695
721
 
722
+ def filter_extra_body(
723
+ completion_create_params: CompletionCreateParams,
724
+ ) -> tuple[CompletionCreateParams, list]:
725
+ """
726
+ completion_create_params is a typed dict of a few standard fields,
727
+ and arbitrary fields from extra_body.
728
+ For all fields matching "datarobot_", copy them to a list for later use, and remove them
729
+ from completion_create_params.
730
+ :param completion_create_params: the chat completion params from OpenAI client via DRUM
731
+ :return: filtered completion_create_params and list of "datarobot_" fields
732
+ """
733
+ datarobot_extra_body_params = []
734
+ our_param_names = [
735
+ p for p in completion_create_params if p.startswith(DATAROBOT_EXTRA_BODY_PREFIX)
736
+ ]
737
+ for name in our_param_names:
738
+ value = completion_create_params[name]
739
+ datarobot_extra_body_params.append({name: value})
740
+ _logger.debug("found DataRobot parameter in extra_body: %s", f"{name}={value}")
741
+ completion_create_params.pop(name, None)
742
+ return completion_create_params, datarobot_extra_body_params
743
+
744
+
745
+ def filter_association_id(
746
+ completion_create_params: CompletionCreateParams,
747
+ ) -> tuple[CompletionCreateParams, str | None]:
748
+ """
749
+ completion_create_params (CCP) is a typed dict of a few standard fields,
750
+ and arbitrary fields from extra_body.
751
+ If a field for the association ID exists, extract that value and remove it from the CCP.
752
+ Do this before calling filter_extra_body(), which would otherwise capture the association ID.
753
+ :param completion_create_params: the chat completion params from OpenAI client via DRUM
754
+ :return: filtered completion_create_params, association ID value
755
+
756
+ If no association ID was found in extra body: return original CCP,None
757
+ """
758
+ name = DATAROBOT_ASSOCIATION_ID_FIELD_NAME
759
+ if name in completion_create_params:
760
+ value = completion_create_params[name]
761
+ _logger.debug("found association ID in extra_body: %s", f"{name}={value}")
762
+ completion_create_params.pop(name, None)
763
+ return completion_create_params, value
764
+ return completion_create_params, None
765
+
766
+
696
767
  def guard_chat_wrapper(
697
768
  completion_create_params, model, pipeline, drum_chat_fn, association_id=None, **kwargs
698
769
  ):
770
+ # if association ID was included in extra_body, extract field name and value
771
+ completion_create_params, eb_assoc_id_value = filter_association_id(completion_create_params)
772
+
773
+ # todo future: filter extra_body params here; pass to pipeline.report_custom_metrics
774
+ # completion_create_params, chat_extra_body_params = filter_extra_body(completion_create_params)
775
+
699
776
  pipeline.get_new_metrics_payload()
700
777
 
778
+ # the chat request is not a dataframe, but we'll build a DF internally for moderation.
701
779
  prompt_column_name = pipeline.get_input_column(GuardStage.PROMPT)
702
- if (
703
- "messages" not in completion_create_params
704
- or completion_create_params["messages"] is None
705
- or len(completion_create_params["messages"]) == 0
706
- or not isinstance(completion_create_params["messages"][-1], dict)
707
- or "content" not in completion_create_params["messages"][-1]
708
- ):
709
- raise ValueError(f"Invalid chat input for moderation: {completion_create_params}")
710
-
711
780
  prompt = get_chat_prompt(completion_create_params)
712
781
  streaming_response_requested = completion_create_params.get("stream", False)
713
782
 
714
783
  data = pd.DataFrame({prompt_column_name: [prompt]})
784
+ # for association IDs (with or without extra_body): the column must be defined in the deployment
785
+ # (here, this means pipeline.get_association_id_column_name() ("standard name") is not empty.)
786
+ # there are 3 likely cases for association ID, and 1 corner case:
787
+ # 1. ID value not provided (drum or extra_body) => no association ID column
788
+ # 2. ID value provided by DRUM => new DF column with standard name and provided value
789
+ # 3. ID defined in extra_body => new DF column with standard name and extra_body value
790
+ # 4. ID in extra_body with empty value => no association ID column
791
+ # Moderation library no longer auto-generates an association ID for chat. However, DRUM does.
715
792
  association_id_column_name = pipeline.get_association_id_column_name()
793
+ association_id = eb_assoc_id_value or association_id
716
794
  if association_id_column_name:
717
795
  if association_id:
718
796
  data[association_id_column_name] = [association_id]
719
- elif pipeline.auto_generate_association_ids:
720
- data[association_id_column_name] = pipeline.generate_association_ids(1)
721
- association_id = data[association_id_column_name].tolist()[0]
722
797
 
723
798
  # ==================================================================
724
799
  # Step 1: Prescore Guards processing
@@ -729,6 +804,11 @@ def guard_chat_wrapper(
729
804
  _logger.debug(filtered_df)
730
805
  _logger.debug(f"Pre Score Guard Latency: {prescore_latency} sec")
731
806
 
807
+ # todo future: add extra_body parameters to custom metrics reporting
808
+ # _logger.debug("Add extra_body params as custom metrics")
809
+ # for param in chat_extra_body_params:
810
+ # _logger.debug(f"Future: add extra_body param: {param}")
811
+
732
812
  blocked_prompt_column_name = f"blocked_{prompt_column_name}"
733
813
  if prescore_df.loc[0, blocked_prompt_column_name]:
734
814
  pipeline.report_custom_metrics(prescore_df)
@@ -890,7 +970,11 @@ def init(model_dir: str = os.getcwd()):
890
970
 
891
971
 
892
972
  class ModerationPipeline:
893
- """Base class to simplify interactions with DRUM."""
973
+ """
974
+ Base class to simplify interactions with DRUM.
975
+ This class is not used outside of testing;
976
+ moderation_pipeline_factory() will select the LLM or VDB subclass instead.
977
+ """
894
978
 
895
979
  def score(self, input_df: pd.DataFrame, model, drum_score_fn, **kwargs):
896
980
  """Default score function just runs the DRUM score function."""
@@ -898,7 +982,7 @@ class ModerationPipeline:
898
982
 
899
983
  def chat(
900
984
  self,
901
- completion_create_params: pd.DataFrame,
985
+ completion_create_params: CompletionCreateParams,
902
986
  model,
903
987
  drum_chat_fn,
904
988
  association_id: str = None,
@@ -920,7 +1004,7 @@ class LlmModerationPipeline(ModerationPipeline):
920
1004
 
921
1005
  def chat(
922
1006
  self,
923
- completion_create_params: pd.DataFrame,
1007
+ completion_create_params: CompletionCreateParams,
924
1008
  model,
925
1009
  drum_chat_fn,
926
1010
  association_id=None,
@@ -949,6 +1033,14 @@ class VdbModerationPipeline(ModerationPipeline):
949
1033
  def moderation_pipeline_factory(
950
1034
  target_type: str, model_dir: str = os.getcwd()
951
1035
  ) -> Optional[ModerationPipeline]:
1036
+ """
1037
+ Create and return a moderation pipeline based on model target type.
1038
+ This function is the main integration point with DRUM;
1039
+ called by DRUM's PythonModelAdapter._load_moderation_hooks.
1040
+ :param target_type: usually textgen, agentic, or vdb
1041
+ :param model_dir:
1042
+ :return:
1043
+ """
952
1044
  # Disable ragas and deepeval tracking while loading the module.
953
1045
  os.environ["RAGAS_DO_NOT_TRACK"] = "true"
954
1046
  os.environ["DEEPEVAL_TELEMETRY_OPT_OUT"] = "YES"
@@ -113,7 +113,7 @@ class LLMPipeline(Pipeline):
113
113
  self._custom_model_dir = os.path.dirname(guards_config_filename)
114
114
 
115
115
  self._modifier_guard_seen = {stage: None for stage in GuardStage.ALL}
116
- self.auto_generate_association_ids = False
116
+ self.auto_generate_association_ids = False # used for score, but not used for chat
117
117
 
118
118
  # Dictionary of async http clients per process - its important to maintain
119
119
  # this when moderation is running with CUSTOM_MODEL_WORKERS > 1
@@ -29,6 +29,7 @@ from datarobot.models.deployment import CustomMetric
29
29
  from datarobot_dome.async_http_client import AsyncHTTPClient
30
30
  from datarobot_dome.constants import DEFAULT_GUARD_PREDICTION_TIMEOUT_IN_SEC
31
31
  from datarobot_dome.constants import LOGGER_NAME_PREFIX
32
+ from datarobot_dome.constants import MODERATIONS_USER_AGENT
32
33
  from datarobot_dome.constants import ModerationEventTypes
33
34
 
34
35
  CUSTOM_METRICS_BULK_UPLOAD_API_PREFIX = "deployments"
@@ -81,6 +82,7 @@ class Pipeline:
81
82
  self._headers = {
82
83
  "Content-Type": "application/json",
83
84
  "Authorization": f"Bearer {self._datarobot_api_token}",
85
+ "User-Agent": MODERATIONS_USER_AGENT,
84
86
  }
85
87
 
86
88
  def _query_self_deployment(self):
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.3
1
+ Metadata-Version: 2.4
2
2
  Name: datarobot-moderations
3
- Version: 11.2.3
3
+ Version: 11.2.4
4
4
  Summary: DataRobot Monitoring and Moderation framework
5
5
  License: DataRobot Tool and Utility Agreement
6
6
  Author: DataRobot
@@ -1,8 +1,8 @@
1
1
  datarobot_dome/__init__.py,sha256=B5Rx8_CNCNsOpxBbRj27XOXCfRZmvmrAR-NzlzIKnDw,583
2
- datarobot_dome/async_http_client.py,sha256=wkB4irwvnchNGzO1bk2C_HWM-GOSB3AUn5TXKl-X0ZI,9649
2
+ datarobot_dome/async_http_client.py,sha256=cQFoSI2ovt0Kyk4XWQPXod5PAfA-ZPkjLYVWQZhDGDE,9809
3
3
  datarobot_dome/chat_helper.py,sha256=BzvtUyZSZxzOqq-5a2wQKhHhr2kMlcP1MFrHaDAeD_o,9671
4
- datarobot_dome/constants.py,sha256=vM2_JkXbn4dkWARCqxNfLriSo0E05LDXVrwNktptpuc,10416
5
- datarobot_dome/drum_integration.py,sha256=BnhAP-D4AaEeh4ferZ-qXnORuWQzYzw9qKAZUTZZnJU,40542
4
+ datarobot_dome/constants.py,sha256=EtdmYdEp9H2awbJVo2Xfmk5PFCJ0nymMSAPIAt8pQgE,10649
5
+ datarobot_dome/drum_integration.py,sha256=nLENtjQEP4nwwyrtesQTj2844I-ap_HwHKvijfxz0Ng,45121
6
6
  datarobot_dome/guard.py,sha256=xJds9hcbUaS-KD5nC1mn0GiPdBrileFUu6BuTAjDNuY,34668
7
7
  datarobot_dome/guard_executor.py,sha256=ox5_jOHcqMaxaaagIYJJHhCwEI7Wg-rUEiu5rutsfVU,35363
8
8
  datarobot_dome/guard_helpers.py,sha256=jfu8JTWCcxu4WD1MKxeP1n53DeebY3SSuP-t5sWyV1U,17187
@@ -14,11 +14,11 @@ datarobot_dome/metrics/citation_metrics.py,sha256=l2mnV1gz7nQeJ_yfaS4dcP3DFWf0p5
14
14
  datarobot_dome/metrics/factory.py,sha256=7caa8paI9LuFXDgguXdC4on28V7IwwIsKJT2Z-Aps8A,2187
15
15
  datarobot_dome/metrics/metric_scorer.py,sha256=uJ_IJRw7ZFHueg8xjsaXbt0ypO7JiydZ0WapCp96yng,2540
16
16
  datarobot_dome/pipeline/__init__.py,sha256=B5Rx8_CNCNsOpxBbRj27XOXCfRZmvmrAR-NzlzIKnDw,583
17
- datarobot_dome/pipeline/llm_pipeline.py,sha256=DMZ4gu88MiSSEQtshDyHOzT3R2Seuf8UqZ7A36QHj3M,18772
18
- datarobot_dome/pipeline/pipeline.py,sha256=7UmvrZtNxTGewpgM4cf2oThHPoJSarEU1Dyp7xEsASU,17401
17
+ datarobot_dome/pipeline/llm_pipeline.py,sha256=4Q-DW8lzKdPBDTNgO-wI-Pyl53IRZNJcjJpfE3kiv08,18813
18
+ datarobot_dome/pipeline/pipeline.py,sha256=GM1mmFtk4xm2xmHiFOefno4K38FNjdMfrynpsp6MLX0,17511
19
19
  datarobot_dome/pipeline/vdb_pipeline.py,sha256=q3c_Z-hGUqhH6j6n8VpS3wZiBIkWgpRDsBnyJyZhiw4,9855
20
20
  datarobot_dome/runtime.py,sha256=FD8wXOweqoQVzbZMh-mucL66xT2kGxPsJUGAcJBgwxw,1468
21
21
  datarobot_dome/streaming.py,sha256=DkvKEH0yN0aPEWMTAjMFJB3Kx4iLGdjUMQU1pAplbeg,17751
22
- datarobot_moderations-11.2.3.dist-info/METADATA,sha256=dzpTYxhAXg-NEm8Rrko8U8qvbQncQoGw93a9ZhWV3jo,4742
23
- datarobot_moderations-11.2.3.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
24
- datarobot_moderations-11.2.3.dist-info/RECORD,,
22
+ datarobot_moderations-11.2.4.dist-info/METADATA,sha256=iksvFgFDIQZA7DF0vR6fICFawRl7xcdl0hy_E4QAakg,4742
23
+ datarobot_moderations-11.2.4.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
24
+ datarobot_moderations-11.2.4.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 2.1.3
2
+ Generator: poetry-core 2.2.1
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any