PyPI - rasa-pro - Versions diffs - 3.12.0.dev4__py3-none-any.whl → 3.12.0.dev6__py3-none-any.whl - Mend

rasa-pro 3.12.0.dev4py3-none-any.whl → 3.12.0.dev6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of rasa-pro might be problematic. Click here for more details.

Files changed (95) hide show

rasa/e2e_test/utils/generative_assertions.py ADDED Viewed

@@ -0,0 +1,243 @@
+import json
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
+import jsonschema
+import numpy as np
+import structlog
+from pydantic import BaseModel, ConfigDict
+from rasa.core.constants import (
+    UTTER_SOURCE_METADATA_KEY,
+)
+from rasa.e2e_test.constants import (
+    KEY_JUSTIFICATION,
+    KEY_SCORE,
+)
+from rasa.e2e_test.e2e_config import LLMJudgeConfig
+from rasa.shared.constants import MODEL_CONFIG_KEY, OPENAI_PROVIDER, PROVIDER_CONFIG_KEY
+from rasa.shared.core.events import BotUttered
+from rasa.shared.exceptions import RasaException
+from rasa.shared.utils.llm import DEFAULT_OPENAI_EMBEDDING_MODEL_NAME, embedder_factory
+if TYPE_CHECKING:
+    from rasa.shared.core.events import Event
+structlogger = structlog.get_logger()
+DEFAULT_EMBEDDINGS_CONFIG = {
+    PROVIDER_CONFIG_KEY: OPENAI_PROVIDER,
+    MODEL_CONFIG_KEY: DEFAULT_OPENAI_EMBEDDING_MODEL_NAME,
+}
+ELIGIBLE_UTTER_SOURCE_METADATA = [
+    "EnterpriseSearchPolicy",
+    "ContextualResponseRephraser",
+    "IntentlessPolicy",
+]
+GROUNDEDNESS_JSON_SUB_SCHEMA = {
+    "properties": {
+        "statements": {
+            "type": "array",
+            "items": {
+                "type": "object",
+                "properties": {
+                    "statement": {"type": "string"},
+                    "score": {"type": "number"},
+                    "justification": {"type": "string"},
+                },
+                "required": ["statement", "score", "justification"],
+            },
+        }
+    },
+    "required": ["statements"],
+}
+ANSWER_RELEVANCE_JSON_SUB_SCHEMA = {
+    "properties": {
+        "question_variations": {
+            "type": "array",
+            "items": {"type": "string"},
+        },
+    },
+    "required": ["question_variations"],
+}
+LLM_JUDGE_OUTPUT_JSON_SCHEMA = {
+    "type": "object",
+    "oneOf": [
+        GROUNDEDNESS_JSON_SUB_SCHEMA,
+        ANSWER_RELEVANCE_JSON_SUB_SCHEMA,
+    ],
+}
+class ScoreInputs(BaseModel):
+    """Input data for the score calculation."""
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+    threshold: float
+    matching_event: BotUttered
+    user_question: str
+    llm_judge_config: LLMJudgeConfig
+def _calculate_similarity(
+    user_question: str,
+    generated_questions: List[str],
+    llm_judge_config: LLMJudgeConfig,
+) -> np.ndarray:
+    """Calculate the cosine similarity between the user question and the generated questions."""  # noqa: E501
+    embedding_client = embedder_factory(
+        llm_judge_config.embeddings_config_as_dict, DEFAULT_EMBEDDINGS_CONFIG
+    )
+    user_question_embedding_response = embedding_client.embed([user_question])
+    question_vector = np.asarray(user_question_embedding_response.data[0]).reshape(
+        1, -1
+    )
+    gen_questions_embedding_response = embedding_client.embed(generated_questions)
+    generated_questions_vectors = np.asarray(
+        gen_questions_embedding_response.data
+    ).reshape(len(generated_questions), -1)
+    # calculate norm
+    question_vector_norm = np.linalg.norm(question_vector, axis=1)
+    generated_questions_vectors_norm = np.linalg.norm(
+        generated_questions_vectors, axis=1
+    )
+    norm = generated_questions_vectors_norm * question_vector_norm
+    norm = np.maximum(norm, 1e-10)
+    # calculate the dot product
+    dot_product = np.dot(generated_questions_vectors, question_vector.T).reshape(-1)
+    # calculate and return cosine similarity
+    return dot_product / norm
+def calculate_relevance_score(
+    processed_output: List[Union[str, Dict[str, Any]]],
+    score_inputs: ScoreInputs,
+) -> Tuple[float, str]:
+    """Calculate the score based on the LLM response."""
+    user_question = score_inputs.user_question
+    llm_judge_config = score_inputs.llm_judge_config
+    generated_questions = [output for output in processed_output]
+    if all(not question for question in generated_questions):
+        score = 0.0
+        error_justification = "No relevant questions were generated"
+        return score, error_justification
+    cosine_sim = _calculate_similarity(
+        user_question, generated_questions, llm_judge_config
+    )
+    score = cosine_sim.mean()
+    if score < score_inputs.threshold:
+        error_justifications = [
+            f"Question '{generated_questions[i]}' "
+            f"has a cosine similarity score of '{round(cosine_sim[i], 2)}' "
+            f"with the user question '{user_question}'"
+            for i in range(len(generated_questions))
+        ]
+        error_justification = ", ".join(error_justifications)
+        return score, error_justification
+    return score, ""
+def calculate_groundedness_score(
+    processed_output: List[Any],
+    score_inputs: ScoreInputs,
+) -> Tuple[float, str]:
+    """Calculate the score based on the LLM response."""
+    matching_event = score_inputs.matching_event
+    total_statements = len(processed_output)
+    correct_statements = sum([output.get(KEY_SCORE, 0) for output in processed_output])
+    score = correct_statements / total_statements
+    structlogger.debug(
+        "generative_response_is_grounded_assertion.run_results",
+        matching_event=repr(matching_event),
+        score=score,
+        justification=f"There were {correct_statements} correct statements "
+        f"out of {total_statements} total extracted statements.",
+    )
+    if score < score_inputs.threshold:
+        justifications = [
+            output.get(KEY_JUSTIFICATION, "")
+            for output in processed_output
+            if output.get(KEY_SCORE, 0) == 0
+        ]
+        justification = ", ".join(justifications).replace(".", "")
+        error_justification = (
+            f"There were {total_statements - correct_statements} "
+            f"incorrect statements out of {total_statements} total "
+            f"extracted statements. The justifications for "
+            f"these statements include: {justification}"
+        )
+        return score, error_justification
+    return score, ""
+def _find_matching_generative_events(
+    turn_events: List["Event"], utter_source: Optional[str]
+) -> List[BotUttered]:
+    """Find the matching events for the generative response assertions."""
+    if utter_source is None:
+        return [
+            event
+            for event in turn_events
+            if isinstance(event, BotUttered)
+            and event.metadata.get(UTTER_SOURCE_METADATA_KEY)
+            in ELIGIBLE_UTTER_SOURCE_METADATA
+        ]
+    return [
+        event
+        for event in turn_events
+        if isinstance(event, BotUttered)
+        and event.metadata.get(UTTER_SOURCE_METADATA_KEY) == utter_source
+    ]
+def _parse_llm_output(llm_response: str, bot_message: str) -> Dict[str, Any]:
+    """Parse the LLM output."""
+    llm_output = (
+        llm_response.replace("```json\n", "").replace("```", "").replace("\n", "")
+    )
+    try:
+        parsed_llm_output = json.loads(llm_output)
+    except json.JSONDecodeError as exc:
+        raise RasaException(
+            f"Failed to parse the LLM Judge response '{llm_output}' for "
+            f"the generative bot message '{bot_message}': {exc}"
+        )
+    return parsed_llm_output
+def _validate_parsed_llm_output(
+    parsed_llm_output: Dict[str, Any], bot_message: str
+) -> None:
+    """Validate the parsed LLM output."""
+    try:
+        jsonschema.validate(parsed_llm_output, LLM_JUDGE_OUTPUT_JSON_SCHEMA)
+    except jsonschema.ValidationError as exc:
+        raise RasaException(
+            f"Failed to validate the LLM Judge json response "
+            f"'{parsed_llm_output}' for the generative bot message "
+            f"'{bot_message}'. Error: {exc}"
+        )

rasa/server.py CHANGED Viewed

@@ -272,7 +272,9 @@ def requires_auth(
             raise ErrorResponse(
                 HTTPStatus.UNAUTHORIZED,
                 "NotAuthenticated",
-                "User is not authenticated.",
+                "User is not authenticated. ",
+                "Please make sure the use of token is supported and that "
+                "the supplied token is valid.",
                 help_url=_docs(
                     "/user-guide/configuring-http-api/#security-considerations"
                 ),

rasa/shared/nlu/constants.py CHANGED Viewed

@@ -9,6 +9,7 @@ KEY_SYSTEM_PROMPT = "system_prompt"
 KEY_LLM_RESPONSE_METADATA = "llm_response_metadata"
 KEY_PROMPT_NAME = "prompt_name"
 KEY_COMPONENT_NAME = "component_name"
+KEY_LATENCY = "latency"
 LLM_COMMANDS = "llm_commands"  # needed for fine-tuning
 LLM_PROMPT = "llm_prompt"  # needed for fine-tuning
 FLOWS_FROM_SEMANTIC_SEARCH = "flows_from_semantic_search"

rasa/shared/providers/llm/llm_response.py CHANGED Viewed

@@ -1,5 +1,7 @@
+import functools
+import time
 from dataclasses import asdict, dataclass, field
-from typing import Any, Dict, List, Optional, Text, Union
+from typing import Any, Awaitable, Callable, Dict, List, Optional, Text, Union
 import structlog
@@ -57,6 +59,9 @@ class LLMResponse:
     """Optional dictionary for storing additional information related to the
     completion that may not be covered by other fields."""
+    latency: Optional[float] = None
+    """Optional field to store the latency of the LLM API call."""
     @classmethod
     def from_dict(cls, data: Dict[Text, Any]) -> "LLMResponse":
         """Creates an LLMResponse from a dictionary."""
@@ -70,6 +75,7 @@ class LLMResponse:
             model=data.get("model"),
             usage=usage_obj,
             additional_info=data.get("additional_info"),
+            latency=data.get("latency"),
         )
     @classmethod
@@ -87,3 +93,17 @@ class LLMResponse:
         if self.usage:
             result["usage"] = self.usage.to_dict()
         return result
+def measure_llm_latency(
+    func: Callable[..., Awaitable[Optional[LLMResponse]]],
+) -> Callable[..., Awaitable[Optional[LLMResponse]]]:
+    @functools.wraps(func)
+    async def wrapper(*args: Any, **kwargs: Any) -> Optional[LLMResponse]:
+        start = time.perf_counter()
+        result: Optional[LLMResponse] = await func(*args, **kwargs)
+        if result:
+            result.latency = time.perf_counter() - start
+        return result
+    return wrapper

rasa/tracing/instrumentation/attribute_extractors.py CHANGED Viewed

@@ -24,7 +24,6 @@ from rasa.dialogue_understanding.stack.dialogue_stack import DialogueStack
 from rasa.dialogue_understanding_test.du_test_result import (
     KEY_TEST_CASES_ACCURACY,
     KEY_USER_UTTERANCES_ACCURACY,
-    OUTPUT_COMMAND_METRICS,
     OUTPUT_NAMES_OF_FAILED_TESTS,
     OUTPUT_NAMES_OF_PASSED_TESTS,
     OUTPUT_NUMBER_OF_FAILED_TESTS,
@@ -44,6 +43,7 @@ from rasa.shared.constants import (
     LLM_CONFIG_KEY,
     MODEL_CONFIG_KEY,
     MODEL_GROUP_ID_CONFIG_KEY,
+    MODELS_CONFIG_KEY,
     PROVIDER_CONFIG_KEY,
     TIMEOUT_CONFIG_KEY,
 )
@@ -614,12 +614,28 @@ def extract_attrs_for_du_print_test_results(
         ),
     }
     if test_suite_result.command_metrics:
-        attributes_dict[OUTPUT_COMMAND_METRICS] = json.dumps(
-            {
-                key: value.as_dict()
-                for key, value in test_suite_result.command_metrics.items()
-            }
-        )
+        for (
+            command_name,
+            command_metric,
+        ) in test_suite_result.command_metrics.items():
+            # OpenTelemetry / Honeycomb doesn't support dictionaries/json values,
+            # so we need to set the values as separate attributes
+            for metric_name, value in command_metric.as_dict().items():
+                attributes_dict[f"{command_name}_{metric_name}"] = value
+    if test_suite_result.llm_config:
+        # check if model group syntax is used
+        if MODELS_CONFIG_KEY in test_suite_result.llm_config:
+            for idx, model_group in enumerate(
+                test_suite_result.llm_config[MODELS_CONFIG_KEY]
+            ):
+                for key, value in model_group.items():
+                    if value is not None:
+                        attributes_dict[f"llm_config_{idx}_{key}"] = value
+        else:
+            for key, value in test_suite_result.llm_config.items():
+                attributes_dict[f"llm_config_0_{key}"] = value
     return attributes_dict

rasa/utils/common.py CHANGED Viewed

@@ -35,7 +35,6 @@ from rasa.constants import (
     ENV_LOG_LEVEL_KAFKA,
     ENV_LOG_LEVEL_LIBRARIES,
     ENV_LOG_LEVEL_MATPLOTLIB,
-    ENV_LOG_LEVEL_MLFLOW,
     ENV_LOG_LEVEL_RABBITMQ,
 )
 from rasa.shared.constants import DEFAULT_LOG_LEVEL, ENV_LOG_LEVEL, TCP_PROTOCOL
@@ -396,19 +395,6 @@ def update_faker_log_level(library_log_level: Text) -> None:
     logging.getLogger("faker").propagate = False
-def update_mlflow_log_level() -> None:
-    """Set the log level of mlflow.
-    Uses the library specific log level or the general libraries log level.
-    """
-    library_log_level = os.environ.get(
-        ENV_LOG_LEVEL_LIBRARIES, DEFAULT_LOG_LEVEL_LIBRARIES
-    )
-    log_level = os.environ.get(ENV_LOG_LEVEL_MLFLOW, library_log_level)
-    logging.getLogger("mlflow").setLevel(log_level)
-    logging.getLogger("mlflow").propagate = False
 def sort_list_of_dicts_by_first_key(dicts: List[Dict]) -> List[Dict]:
     """Sorts a list of dictionaries by their first key."""
     return sorted(dicts, key=lambda d: next(iter(d.keys())))

rasa/version.py CHANGED Viewed

@@ -1,3 +1,3 @@
 # this file will automatically be changed,
 # do not add anything but the version number here!
-__version__ = "3.12.0dev4"
+__version__ = "3.12.0dev6"

{rasa_pro-3.12.0.dev4.dist-info → rasa_pro-3.12.0.dev6.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: rasa-pro
-Version: 3.12.0.dev4
+Version: 3.12.0.dev6
 Summary: State-of-the-art open-core Conversational AI framework for Enterprises that natively leverages generative AI for effortless assistant development.
 Home-page: https://rasa.com
 Keywords: nlp,machine-learning,machine-learning-library,bot,bots,botkit,rasa conversational-agents,conversational-ai,chatbot,chatbot-framework,bot-framework
@@ -20,7 +20,6 @@ Provides-Extra: full
 Provides-Extra: gh-release-notes
 Provides-Extra: jieba
 Provides-Extra: metal
-Provides-Extra: mlflow
 Provides-Extra: spacy
 Provides-Extra: transformers
 Requires-Dist: CacheControl (>=0.14.2,<0.15.0)
@@ -67,7 +66,6 @@ Requires-Dist: langchain-community (>=0.2.0,<0.3.0)
 Requires-Dist: litellm (>=1.52.6,<1.53.0)
 Requires-Dist: matplotlib (>=3.7,<3.8)
 Requires-Dist: mattermostwrapper (>=2.2,<2.3)
-Requires-Dist: mlflow (>=2.15.1,<3.0.0) ; extra == "mlflow"
 Requires-Dist: networkx (>=3.1,<3.2)
 Requires-Dist: numpy (>=1.26.4,<1.27.0)
 Requires-Dist: openai (>=1.55.3,<1.56.0)

rasa-pro 3.12.0.dev4__py3-none-any.whl → 3.12.0.dev6__py3-none-any.whl

Potentially problematic release.

rasa-pro 3.12.0.dev4py3-none-any.whl → 3.12.0.dev6py3-none-any.whl