PyPI - rasa-pro - Versions diffs - 3.14.1__py3-none-any.whl → 3.15.0.dev20251027__py3-none-any.whl - Mend

rasa-pro 3.14.1py3-none-any.whl → 3.15.0.dev20251027py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of rasa-pro might be problematic. Click here for more details.

Files changed (43) hide show

rasa/builder/config.py +4 -0
rasa/builder/copilot/copilot.py +28 -9
rasa/builder/copilot/models.py +251 -32
rasa/builder/document_retrieval/inkeep_document_retrieval.py +2 -0
rasa/builder/download.py +1 -1
rasa/builder/evaluator/__init__.py +0 -0
rasa/builder/evaluator/constants.py +15 -0
rasa/builder/evaluator/copilot_executor.py +89 -0
rasa/builder/evaluator/dataset/models.py +173 -0
rasa/builder/evaluator/exceptions.py +4 -0
rasa/builder/evaluator/response_classification/__init__.py +0 -0
rasa/builder/evaluator/response_classification/constants.py +66 -0
rasa/builder/evaluator/response_classification/evaluator.py +346 -0
rasa/builder/evaluator/response_classification/langfuse_runner.py +463 -0
rasa/builder/evaluator/response_classification/models.py +61 -0
rasa/builder/evaluator/scripts/__init__.py +0 -0
rasa/builder/evaluator/scripts/run_response_classification_evaluator.py +152 -0
rasa/builder/service.py +101 -24
rasa/builder/telemetry/__init__.py +0 -0
rasa/builder/telemetry/copilot_langfuse_telemetry.py +384 -0
rasa/builder/{copilot/telemetry.py → telemetry/copilot_segment_telemetry.py} +21 -3
rasa/constants.py +1 -0
rasa/core/policies/flows/flow_executor.py +20 -6
rasa/core/run.py +15 -4
rasa/dialogue_understanding/generator/single_step/compact_llm_command_generator.py +15 -7
rasa/dialogue_understanding/generator/single_step/search_ready_llm_command_generator.py +15 -8
rasa/e2e_test/e2e_config.py +4 -3
rasa/engine/recipes/default_components.py +16 -6
rasa/graph_components/validators/default_recipe_validator.py +10 -4
rasa/nlu/classifiers/diet_classifier.py +2 -0
rasa/shared/core/slots.py +55 -24
rasa/shared/utils/common.py +9 -1
rasa/utils/common.py +9 -0
rasa/utils/endpoints.py +2 -0
rasa/utils/installation_utils.py +111 -0
rasa/utils/tensorflow/callback.py +2 -0
rasa/utils/train_utils.py +2 -0
rasa/version.py +1 -1
{rasa_pro-3.14.1.dist-info → rasa_pro-3.15.0.dev20251027.dist-info}/METADATA +4 -2
{rasa_pro-3.14.1.dist-info → rasa_pro-3.15.0.dev20251027.dist-info}/RECORD +43 -28
{rasa_pro-3.14.1.dist-info → rasa_pro-3.15.0.dev20251027.dist-info}/NOTICE +0 -0
{rasa_pro-3.14.1.dist-info → rasa_pro-3.15.0.dev20251027.dist-info}/WHEEL +0 -0
{rasa_pro-3.14.1.dist-info → rasa_pro-3.15.0.dev20251027.dist-info}/entry_points.txt +0 -0

rasa/builder/config.py CHANGED Viewed

@@ -13,6 +13,10 @@ OPENAI_VECTOR_STORE_ID = os.getenv(
 )
 OPENAI_MAX_VECTOR_RESULTS = int(os.getenv("OPENAI_MAX_VECTOR_RESULTS", "10"))
 OPENAI_TIMEOUT = int(os.getenv("OPENAI_TIMEOUT", "30"))
+# OpenAI Token Pricing Configuration (per 1,000 tokens)
+COPILOT_INPUT_TOKEN_PRICE = float(os.getenv("COPILOT_INPUT_TOKEN_PRICE", "0.002"))
+COPILOT_OUTPUT_TOKEN_PRICE = float(os.getenv("COPILOT_OUTPUT_TOKEN_PRICE", "0.0005"))
+COPILOT_CACHED_TOKEN_PRICE = float(os.getenv("COPILOT_CACHED_TOKEN_PRICE", "0.002"))
 # Server Configuration
 BUILDER_SERVER_HOST = os.getenv("SERVER_HOST", "0.0.0.0")

rasa/builder/copilot/copilot.py CHANGED Viewed

@@ -42,6 +42,7 @@ from rasa.builder.exceptions import (
     DocumentRetrievalError,
 )
 from rasa.builder.shared.tracker_context import TrackerContext
+from rasa.builder.telemetry.copilot_langfuse_telemetry import CopilotLangfuseTelemetry
 from rasa.shared.constants import PACKAGE_NAME
 structlogger = structlog.get_logger()
@@ -72,7 +73,11 @@ class Copilot:
         )
         # The final stream chunk includes usage statistics.
-        self.usage_statistics = UsageStatistics()
+        self.usage_statistics = UsageStatistics(
+            input_token_price=config.COPILOT_INPUT_TOKEN_PRICE,
+            output_token_price=config.COPILOT_OUTPUT_TOKEN_PRICE,
+            cached_token_price=config.COPILOT_CACHED_TOKEN_PRICE,
+        )
     @asynccontextmanager
     async def _get_client(self) -> AsyncGenerator[openai.AsyncOpenAI, None]:
@@ -94,6 +99,16 @@ class Copilot:
                     error=str(exc),
                 )
+    @property
+    def llm_config(self) -> Dict[str, Any]:
+        """The LLM config used to generate the response."""
+        return {
+            "model": config.OPENAI_MODEL,
+            "temperature": config.OPENAI_TEMPERATURE,
+            "stream": True,
+            "stream_options": {"include_usage": True},
+        }
     async def search_rasa_documentation(
         self,
         context: CopilotContext,
@@ -108,7 +123,9 @@ class Copilot:
         """
         try:
             query = self._create_documentation_search_query(context)
-            return await self._inkeep_document_retrieval.retrieve_documents(query)
+            documents = await self._inkeep_document_retrieval.retrieve_documents(query)
+            # TODO: Log documentation retrieval to Langfuse
+            return documents
         except DocumentRetrievalError as e:
             structlogger.error(
                 "copilot.search_rasa_documentation.error",
@@ -145,11 +162,12 @@ class Copilot:
             Exception: If an unexpected error occurs.
         """
         relevant_documents = await self.search_rasa_documentation(context)
-        messages = await self._build_messages(context, relevant_documents)
         tracker_event_attachments = self._extract_tracker_event_attachments(
             context.copilot_chat_history[-1]
         )
+        messages = await self._build_messages(context, relevant_documents)
+        # TODO: Delete this after Langfuse is implemented
         support_evidence = CopilotGenerationContext(
             relevant_documents=relevant_documents,
             system_message=messages[0],
@@ -163,6 +181,7 @@ class Copilot:
             support_evidence,
         )
+    @CopilotLangfuseTelemetry.trace_copilot_streaming_generation
     async def _stream_response(
         self, messages: List[Dict[str, Any]]
     ) -> AsyncGenerator[str, None]:
@@ -172,13 +191,10 @@ class Copilot:
         try:
             async with self._get_client() as client:
                 stream = await client.chat.completions.create(
-                    model=config.OPENAI_MODEL,
-                    messages=messages,  # type: ignore
-                    temperature=config.OPENAI_TEMPERATURE,
-                    stream=True,
-                    stream_options={"include_usage": True},
+                    messages=messages,
+                    **self.llm_config,
                 )
-                async for chunk in stream:
+                async for chunk in stream:  # type: ignore[attr-defined]
                     # The final chunk, which contains the usage statistics,
                     # arrives with an empty `choices` list.
                     if not chunk.choices:
@@ -189,6 +205,7 @@ class Copilot:
                     delta = chunk.choices[0].delta
                     if delta and delta.content:
                         yield delta.content
         except openai.OpenAIError as e:
             structlogger.exception("copilot.stream_response.api_error", error=str(e))
             raise CopilotStreamError(
@@ -559,4 +576,6 @@ class Copilot:
         """Extract the tracker event attachments from the message."""
         if not isinstance(message, UserChatMessage):
             return []
+        # TODO: Log tracker event attachments to Langfuse only in the case of the
+        #       User chat message.
         return message.get_content_blocks_by_type(EventContent)

rasa/builder/copilot/models.py CHANGED Viewed

@@ -3,6 +3,7 @@ from enum import Enum
 from typing import Any, Dict, List, Literal, Optional, Type, TypeVar, Union
 import structlog
+from openai.types.chat import ChatCompletion
 from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
 from pydantic import (
     BaseModel,
@@ -343,6 +344,55 @@ ChatMessage = Union[
 ]
+def create_chat_message_from_dict(message_data: Dict[str, Any]) -> ChatMessage:
+    """Parse a single chat message dictionary into a ChatMessage object.
+    This utility function manually parses a chat message dictionary into the
+    appropriate ChatMessage type based on its role field.
+    Args:
+        message_data: Dictionary containing chat message data
+    Returns:
+        Parsed ChatMessage object
+    Raises:
+        ValueError: If an unknown role is encountered
+    Example:
+        >>> message_data = {
+        ...     "role": "user",
+        ...     "content": [{"type": "text", "text": "Hello"}]
+        ... }
+        >>> message = parse_chat_message_from_dict(message_data)
+        >>> isinstance(message, UserChatMessage)
+        True
+        >>> message.role
+        'user'
+    """
+    available_roles = [ROLE_USER, ROLE_COPILOT, ROLE_COPILOT_INTERNAL]
+    role = message_data.get("role")
+    if role == ROLE_USER:
+        return UserChatMessage(**message_data)
+    elif role == ROLE_COPILOT:
+        return CopilotChatMessage(**message_data)
+    elif role == ROLE_COPILOT_INTERNAL:
+        return InternalCopilotRequestChatMessage(**message_data)
+    else:
+        message = (
+            f"Unknown role '{role}' in chat message. "
+            f"Available roles are: {', '.join(available_roles)}."
+        )
+        structlogger.error(
+            "models.create_chat_message_from_dict.unknown_role",
+            event_info=message,
+            role=role,
+            available_roles=available_roles,
+        )
+        raise ValueError(message)
 class CopilotContext(BaseModel):
     """Model containing the context used by the copilot to generate a response."""
@@ -390,37 +440,40 @@ class CopilotRequest(BaseModel):
     @field_validator("copilot_chat_history", mode="before")
     @classmethod
-    def parse_chat_history(cls, v: List[Dict[str, Any]]) -> List[ChatMessage]:
+    def parse_chat_history(
+        cls, v: Union[List[Dict[str, Any]], List[ChatMessage]]
+    ) -> List[ChatMessage]:
         """Manually parse chat history messages based on role field."""
+        # If already parsed ChatMessage objects, return them as-is
+        if (
+            v
+            and isinstance(v, list)
+            and all(isinstance(item, ChatMessage) for item in v)
+        ):
+            return v  # type: ignore[return-value]
+        # Check for mixed types (some ChatMessage, some not)
+        if (
+            v
+            and isinstance(v, list)
+            and any(isinstance(item, ChatMessage) for item in v)
+        ):
+            message = (
+                "Mixed types in copilot_chat_history: cannot mix ChatMessage objects"
+                "with other types."
+            )
+            structlog.get_logger().error(
+                "copilot_request.parse_chat_history.mixed_types",
+                event_info=message,
+                chat_history_types=[type(item) for item in v],
+            )
+            raise ValueError(message)
+        # Otherwise, parse from dictionaries
         parsed_messages: List[ChatMessage] = []
-        available_roles = [ROLE_USER, ROLE_COPILOT, ROLE_COPILOT_INTERNAL]
         for message_data in v:
-            role = message_data.get("role")
-            if role == ROLE_USER:
-                parsed_messages.append(UserChatMessage(**message_data))
-            elif role == ROLE_COPILOT:
-                parsed_messages.append(CopilotChatMessage(**message_data))
-            elif role == ROLE_COPILOT_INTERNAL:
-                parsed_messages.append(
-                    InternalCopilotRequestChatMessage(**message_data)
-                )
-            else:
-                message = (
-                    f"Unknown role '{role}' in chat message. "
-                    f"Available roles are: {', '.join(available_roles)}."
-                )
-                structlogger.error(
-                    "copilot_request.parse_chat_history.unknown_role",
-                    event_info=message,
-                    role=role,
-                    available_roles=available_roles,
-                )
-                raise ValueError(message)
+            chat_message = create_chat_message_from_dict(message_data)
+            parsed_messages.append(chat_message)
         return parsed_messages
     @property
@@ -612,16 +665,171 @@ class TrainingErrorLog(CopilotOutput):
 class UsageStatistics(BaseModel):
-    prompt_tokens: Optional[int] = None
-    completion_tokens: Optional[int] = None
-    total_tokens: Optional[int] = None
-    model: Optional[str] = None
+    """Usage statistics for a copilot generation."""
+    # Token usage statistics
+    prompt_tokens: Optional[int] = Field(
+        default=None,
+        description=(
+            "Total number of prompt tokens used to generate completion. "
+            "Should include cached prompt tokens."
+        ),
+    )
+    completion_tokens: Optional[int] = Field(
+        default=None,
+        description="Number of generated tokens.",
+    )
+    total_tokens: Optional[int] = Field(
+        default=None,
+        description="Total number of tokens used (input + output).",
+    )
+    cached_prompt_tokens: Optional[int] = Field(
+        default=None,
+        description="Number of cached prompt tokens.",
+    )
+    model: Optional[str] = Field(
+        default=None,
+        description="The model used to generate the response.",
+    )
+    # Token prices
+    input_token_price: float = Field(
+        default=0.0,
+        description="Price per 1K input tokens in dollars.",
+    )
+    output_token_price: float = Field(
+        default=0.0,
+        description="Price per 1K output tokens in dollars.",
+    )
+    cached_token_price: float = Field(
+        default=0.0,
+        description="Price per 1K cached tokens in dollars.",
+    )
+    @property
+    def non_cached_prompt_tokens(self) -> Optional[int]:
+        """Get the non-cached prompt tokens."""
+        if self.cached_prompt_tokens is not None and self.prompt_tokens is not None:
+            return self.prompt_tokens - self.cached_prompt_tokens
+        return self.prompt_tokens
+    @property
+    def non_cached_cost(self) -> Optional[float]:
+        """Calculate the non-cached token cost based on configured pricing."""
+        if self.non_cached_prompt_tokens is None:
+            return None
+        if self.non_cached_prompt_tokens == 0:
+            return 0.0
+        return (self.non_cached_prompt_tokens / 1000.0) * self.input_token_price
+    @property
+    def cached_cost(self) -> Optional[float]:
+        """Calculate the cached token cost based on configured pricing."""
+        if self.cached_prompt_tokens is None:
+            return None
+        if self.cached_prompt_tokens == 0:
+            return 0.0
+        return (self.cached_prompt_tokens / 1000.0) * self.cached_token_price
+    @property
+    def input_cost(self) -> Optional[float]:
+        """Calculate the input token cost based on configured pricing.
+        The calculation takes into account the cached prompt tokens (if available) too.
+        """
+        # If both non-cached and cached costs are None, there's no input cost
+        if self.non_cached_cost is None and self.cached_cost is None:
+            return None
+        # If only non-cached cost is available, return it
+        if self.non_cached_cost is not None and self.cached_cost is None:
+            return self.non_cached_cost
+        # If only cached cost is available, return it
+        if self.non_cached_cost is None and self.cached_cost is not None:
+            return self.cached_cost
+        # If both are available, return the sum
+        return self.non_cached_cost + self.cached_cost  # type: ignore[operator]
+    @property
+    def output_cost(self) -> Optional[float]:
+        """Calculate the output token cost based on configured pricing."""
+        if self.completion_tokens is None:
+            return None
+        if self.completion_tokens == 0:
+            return 0.0
+        return (self.completion_tokens / 1000.0) * self.output_token_price
+    @property
+    def total_cost(self) -> Optional[float]:
+        """Calculate the total cost based on configured pricing.
+        Returns:
+            Total cost in dollars, or None if insufficient data.
+        """
+        if self.input_cost is None or self.output_cost is None:
+            return None
+        return self.input_cost + self.output_cost
+    def update_token_prices(
+        self,
+        input_token_price: float,
+        output_token_price: float,
+        cached_token_price: float,
+    ) -> None:
+        """Update token prices with provided values.
+        Args:
+            input_token_price: Price per 1K input tokens in dollars.
+            output_token_price: Price per 1K output tokens in dollars.
+            cached_token_price: Price per 1K cached tokens in dollars.
+        """
+        self.input_token_price = input_token_price
+        self.output_token_price = output_token_price
+        self.cached_token_price = cached_token_price
+    @classmethod
+    def from_chat_completion_response(
+        cls,
+        response: ChatCompletion,
+        input_token_price: float = 0.0,
+        output_token_price: float = 0.0,
+        cached_token_price: float = 0.0,
+    ) -> Optional["UsageStatistics"]:
+        """Create a UsageStatistics object from a ChatCompletionChunk."""
+        if not (usage := getattr(response, "usage", None)):
+            return None
+        usage_statistics = cls(
+            input_token_price=input_token_price,
+            output_token_price=output_token_price,
+            cached_token_price=cached_token_price,
+        )
+        usage_statistics.prompt_tokens = usage.prompt_tokens
+        usage_statistics.completion_tokens = usage.completion_tokens
+        usage_statistics.total_tokens = usage.total_tokens
+        usage_statistics.model = getattr(response, "model", None)
+        # Extract cached tokens if available
+        if hasattr(usage, "prompt_tokens_details") and usage.prompt_tokens_details:
+            usage_statistics.cached_prompt_tokens = getattr(
+                usage.prompt_tokens_details, "cached_tokens", None
+            )
+        return usage_statistics
     def reset(self) -> None:
         """Reset usage statistics to their default values."""
         self.prompt_tokens = None
         self.completion_tokens = None
         self.total_tokens = None
+        self.cached_prompt_tokens = None
         self.model = None
     def update_from_stream_chunk(self, chunk: ChatCompletionChunk) -> None:
@@ -630,14 +838,25 @@ class UsageStatistics(BaseModel):
         Args:
             chunk: The OpenAI stream chunk containing usage statistics.
         """
+        # Reset the usage statistics to their default values
+        self.reset()
+        # If the chunk has no usage statistics, return
         if not (usage := getattr(chunk, "usage", None)):
             return
+        # Update the usage statistics with the values from the chunk
         self.prompt_tokens = usage.prompt_tokens
         self.completion_tokens = usage.completion_tokens
         self.total_tokens = usage.total_tokens
         self.model = getattr(chunk, "model", None)
+        # Extract cached tokens if available
+        if hasattr(usage, "prompt_tokens_details") and usage.prompt_tokens_details:
+            self.cached_prompt_tokens = getattr(
+                usage.prompt_tokens_details, "cached_tokens", None
+            )
 class SigningContext(BaseModel):
     secret: Optional[str] = Field(None)

rasa/builder/document_retrieval/inkeep_document_retrieval.py CHANGED Viewed

@@ -17,6 +17,7 @@ from rasa.builder.document_retrieval.constants import (
 )
 from rasa.builder.document_retrieval.models import Document
 from rasa.builder.exceptions import DocumentRetrievalError
+from rasa.builder.telemetry.copilot_langfuse_telemetry import CopilotLangfuseTelemetry
 from rasa.shared.utils.io import read_json_file
 structlogger = structlog.get_logger()
@@ -88,6 +89,7 @@ class InKeepDocumentRetrieval:
             )
             raise e
+    @CopilotLangfuseTelemetry.trace_document_retrieval_generation
     async def _call_inkeep_rag_api(
         self, query: str, temperature: float, timeout: float
     ) -> ChatCompletion:

rasa/builder/download.py CHANGED Viewed

@@ -27,7 +27,7 @@ def _get_pyproject_toml_content(project_id: str) -> str:
         version = "0.1.0"
         description = "Add your description for your Rasa bot here"
         readme = "README.md"
-        dependencies = ["rasa-pro>=3.13"]
+        dependencies = ["rasa-pro>=3.14"]
         requires-python = ">={sys.version_info.major}.{sys.version_info.minor}"
         """
     )

rasa/builder/evaluator/__init__.py ADDED Viewed

File without changes

rasa/builder/evaluator/constants.py ADDED Viewed

@@ -0,0 +1,15 @@
+"""Constants for the evaluator module."""
+from pathlib import Path
+# Base directory for the rasa package
+BASE_DIR = Path(__file__).parent.parent.parent
+# Response classification evaluation results directory
+RESPONSE_CLASSIFICATION_EVALUATION_RESULTS_DIR = (
+    BASE_DIR / "builder" / "evaluator" / "results"
+)
+# Default output filename
+DEFAULT_RESPONSE_CLASSIFICATION_EVALUATION_TEXT_OUTPUT_FILENAME = "run_results.txt"
+# Default YAML output filename
+RESPONSE_CLASSIFICATION_EVALUATION_YAML_OUTPUT_FILENAME = "run_results.yaml"

rasa/builder/evaluator/copilot_executor.py ADDED Viewed

@@ -0,0 +1,89 @@
+"""Copilot execution utilities for evaluators.
+This module provides utilities for running copilot operations in evaluation contexts,
+independent of specific evaluation frameworks like Langfuse.
+"""
+from typing import List, Optional
+import structlog
+from pydantic import BaseModel
+from rasa.builder.config import COPILOT_HANDLER_ROLLING_BUFFER_SIZE
+from rasa.builder.copilot.models import (
+    CopilotContext,
+    CopilotGenerationContext,
+    GeneratedContent,
+    ReferenceSection,
+    ResponseCategory,
+)
+from rasa.builder.llm_service import llm_service
+structlogger = structlog.get_logger()
+class CopilotRunResult(BaseModel):
+    """Result from running the copilot with response handler."""
+    complete_response: Optional[str]
+    response_category: Optional[ResponseCategory]
+    reference_section: Optional[ReferenceSection]
+    generation_context: CopilotGenerationContext
+async def run_copilot_with_response_handler(
+    context: CopilotContext,
+) -> Optional[CopilotRunResult]:
+    """Run the copilot with response handler on the given context.
+    This function encapsulates the core copilot execution logic. It handles:
+    - Instantiating the copilot and response handler
+    - Generating a response and extracting the reference section from the given context
+    - Returning structured results
+    Args:
+        context: The copilot context to process.
+    Returns:
+        CopilotRunResult containing the complete response, category, and generation
+        context, or None if execution fails.
+    Raises:
+        Any exceptions from the copilot or response handler execution.
+    """
+    # Instantiate the copilot and response handler
+    copilot = llm_service.instantiate_copilot()
+    copilot_response_handler = llm_service.instantiate_handler(
+        COPILOT_HANDLER_ROLLING_BUFFER_SIZE
+    )
+    # Call the copilot to generate a response and handle it with the response
+    # handler
+    (original_stream, generation_context) = await copilot.generate_response(context)
+    intercepted_stream = copilot_response_handler.handle_response(original_stream)
+    # Exhaust the stream to get the complete response for evaluation
+    response_chunks: List[str] = []
+    response_category = None
+    async for chunk in intercepted_stream:
+        if not isinstance(chunk, GeneratedContent):
+            continue
+        response_chunks.append(chunk.content)
+        response_category = chunk.response_category
+    complete_response = "".join(response_chunks) if response_chunks else None
+    # Extract the reference section from the response handler
+    if generation_context.relevant_documents:
+        reference_section = copilot_response_handler.extract_references(
+            generation_context.relevant_documents
+        )
+    else:
+        reference_section = None
+    return CopilotRunResult(
+        complete_response=complete_response,
+        response_category=response_category,
+        reference_section=reference_section,
+        generation_context=generation_context,
+    )

rasa-pro 3.14.1__py3-none-any.whl → 3.15.0.dev20251027__py3-none-any.whl

Potentially problematic release.

rasa-pro 3.14.1py3-none-any.whl → 3.15.0.dev20251027py3-none-any.whl